LLVM  16.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
39 #include "llvm/CodeGen/Analysis.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DebugInfo.h"
57 #include "llvm/IR/DebugLoc.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalValue.h"
61 #include "llvm/IR/GlobalVariable.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/MC/MCExpr.h"
69 #include "llvm/MC/MCInst.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCStreamer.h"
72 #include "llvm/MC/MCSymbol.h"
73 #include "llvm/MC/TargetRegistry.h"
74 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/Endian.h"
80 #include "llvm/Support/Path.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <cstring>
88 #include <new>
89 #include <string>
90 #include <utility>
91 #include <vector>
92 
93 using namespace llvm;
94 
95 #define DEPOTNAME "__local_depot"
96 
97 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
98 /// depends.
99 static void
102  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
103  Globals.insert(GV);
104  else {
105  if (const User *U = dyn_cast<User>(V)) {
106  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
107  DiscoverDependentGlobals(U->getOperand(i), Globals);
108  }
109  }
110  }
111 }
112 
113 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
114 /// instances to be emitted, but only after any dependents have been added
115 /// first.s
116 static void
121  // Have we already visited this one?
122  if (Visited.count(GV))
123  return;
124 
125  // Do we have a circular dependency?
126  if (!Visiting.insert(GV).second)
127  report_fatal_error("Circular dependency found in global variable set");
128 
129  // Make sure we visit all dependents first
131  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
132  DiscoverDependentGlobals(GV->getOperand(i), Others);
133 
134  for (const GlobalVariable *GV : Others)
135  VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
136 
137  // Now we can visit ourself
138  Order.push_back(GV);
139  Visited.insert(GV);
140  Visiting.erase(GV);
141 }
142 
143 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
144  NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
145  getSubtargetInfo().getFeatureBits());
146 
147  MCInst Inst;
148  lowerToMCInst(MI, Inst);
149  EmitToStreamer(*OutStreamer, Inst);
150 }
151 
152 // Handle symbol backtracking for targets that do not support image handles
153 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
154  unsigned OpNo, MCOperand &MCOp) {
155  const MachineOperand &MO = MI->getOperand(OpNo);
156  const MCInstrDesc &MCID = MI->getDesc();
157 
158  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
159  // This is a texture fetch, so operand 4 is a texref and operand 5 is
160  // a samplerref
161  if (OpNo == 4 && MO.isImm()) {
162  lowerImageHandleSymbol(MO.getImm(), MCOp);
163  return true;
164  }
165  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
166  lowerImageHandleSymbol(MO.getImm(), MCOp);
167  return true;
168  }
169 
170  return false;
171  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
172  unsigned VecSize =
173  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
174 
175  // For a surface load of vector size N, the Nth operand will be the surfref
176  if (OpNo == VecSize && MO.isImm()) {
177  lowerImageHandleSymbol(MO.getImm(), MCOp);
178  return true;
179  }
180 
181  return false;
182  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
183  // This is a surface store, so operand 0 is a surfref
184  if (OpNo == 0 && MO.isImm()) {
185  lowerImageHandleSymbol(MO.getImm(), MCOp);
186  return true;
187  }
188 
189  return false;
190  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
191  // This is a query, so operand 1 is a surfref/texref
192  if (OpNo == 1 && MO.isImm()) {
193  lowerImageHandleSymbol(MO.getImm(), MCOp);
194  return true;
195  }
196 
197  return false;
198  }
199 
200  return false;
201 }
202 
203 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
204  // Ewwww
205  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
206  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
208  const char *Sym = MFI->getImageHandleSymbol(Index);
209  std::string *SymNamePtr =
210  nvTM.getManagedStrPool()->getManagedString(Sym);
211  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
212 }
213 
214 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
215  OutMI.setOpcode(MI->getOpcode());
216  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
217  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
218  const MachineOperand &MO = MI->getOperand(0);
219  OutMI.addOperand(GetSymbolRef(
221  return;
222  }
223 
224  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
225  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
226  const MachineOperand &MO = MI->getOperand(i);
227 
228  MCOperand MCOp;
229  if (!STI.hasImageHandles()) {
230  if (lowerImageHandleOperand(MI, i, MCOp)) {
231  OutMI.addOperand(MCOp);
232  continue;
233  }
234  }
235 
236  if (lowerOperand(MO, MCOp))
237  OutMI.addOperand(MCOp);
238  }
239 }
240 
241 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
242  MCOperand &MCOp) {
243  switch (MO.getType()) {
244  default: llvm_unreachable("unknown operand type");
246  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
247  break;
249  MCOp = MCOperand::createImm(MO.getImm());
250  break;
253  MO.getMBB()->getSymbol(), OutContext));
254  break;
257  break;
259  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
260  break;
262  const ConstantFP *Cnt = MO.getFPImm();
263  const APFloat &Val = Cnt->getValueAPF();
264 
265  switch (Cnt->getType()->getTypeID()) {
266  default: report_fatal_error("Unsupported FP type"); break;
267  case Type::HalfTyID:
268  MCOp = MCOperand::createExpr(
270  break;
271  case Type::FloatTyID:
272  MCOp = MCOperand::createExpr(
274  break;
275  case Type::DoubleTyID:
276  MCOp = MCOperand::createExpr(
278  break;
279  }
280  break;
281  }
282  }
283  return true;
284 }
285 
286 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
288  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
289 
290  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
291  unsigned RegNum = RegMap[Reg];
292 
293  // Encode the register class in the upper 4 bits
294  // Must be kept in sync with NVPTXInstPrinter::printRegName
295  unsigned Ret = 0;
296  if (RC == &NVPTX::Int1RegsRegClass) {
297  Ret = (1 << 28);
298  } else if (RC == &NVPTX::Int16RegsRegClass) {
299  Ret = (2 << 28);
300  } else if (RC == &NVPTX::Int32RegsRegClass) {
301  Ret = (3 << 28);
302  } else if (RC == &NVPTX::Int64RegsRegClass) {
303  Ret = (4 << 28);
304  } else if (RC == &NVPTX::Float32RegsRegClass) {
305  Ret = (5 << 28);
306  } else if (RC == &NVPTX::Float64RegsRegClass) {
307  Ret = (6 << 28);
308  } else if (RC == &NVPTX::Float16RegsRegClass) {
309  Ret = (7 << 28);
310  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
311  Ret = (8 << 28);
312  } else {
313  report_fatal_error("Bad register class");
314  }
315 
316  // Insert the vreg number
317  Ret |= (RegNum & 0x0FFFFFFF);
318  return Ret;
319  } else {
320  // Some special-use registers are actually physical registers.
321  // Encode this as the register class ID of 0 and the real register ID.
322  return Reg & 0x0FFFFFFF;
323  }
324 }
325 
327  const MCExpr *Expr;
329  OutContext);
330  return MCOperand::createExpr(Expr);
331 }
332 
333 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
334  const DataLayout &DL = getDataLayout();
335  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
336  const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
337 
338  Type *Ty = F->getReturnType();
339 
340  bool isABI = (STI.getSmVersion() >= 20);
341 
342  if (Ty->getTypeID() == Type::VoidTyID)
343  return;
344 
345  O << " (";
346 
347  if (isABI) {
348  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
349  unsigned size = 0;
350  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
351  size = ITy->getBitWidth();
352  } else {
353  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
355  }
356  // PTX ABI requires all scalar return values to be at least 32
357  // bits in size. fp16 normally uses .b16 as its storage type in
358  // PTX, so its size must be adjusted here, too.
360 
361  O << ".param .b" << size << " func_retval0";
362  } else if (isa<PointerType>(Ty)) {
363  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
364  << " func_retval0";
365  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
366  unsigned totalsz = DL.getTypeAllocSize(Ty);
367  unsigned retAlignment = 0;
368  if (!getAlign(*F, 0, retAlignment))
369  retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
370  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
371  << "]";
372  } else
373  llvm_unreachable("Unknown return type");
374  } else {
375  SmallVector<EVT, 16> vtparts;
376  ComputeValueVTs(*TLI, DL, Ty, vtparts);
377  unsigned idx = 0;
378  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
379  unsigned elems = 1;
380  EVT elemtype = vtparts[i];
381  if (vtparts[i].isVector()) {
382  elems = vtparts[i].getVectorNumElements();
383  elemtype = vtparts[i].getVectorElementType();
384  }
385 
386  for (unsigned j = 0, je = elems; j != je; ++j) {
387  unsigned sz = elemtype.getSizeInBits();
388  if (elemtype.isInteger())
389  sz = promoteScalarArgumentSize(sz);
390  O << ".reg .b" << sz << " func_retval" << idx;
391  if (j < je - 1)
392  O << ", ";
393  ++idx;
394  }
395  if (i < e - 1)
396  O << ", ";
397  }
398  }
399  O << ") ";
400 }
401 
402 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
403  raw_ostream &O) {
404  const Function &F = MF.getFunction();
405  printReturnValStr(&F, O);
406 }
407 
408 // Return true if MBB is the header of a loop marked with
409 // llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
410 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
411  const MachineBasicBlock &MBB) const {
412  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
413  // We insert .pragma "nounroll" only to the loop header.
414  if (!LI.isLoopHeader(&MBB))
415  return false;
416 
417  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
418  // we iterate through each back edge of the loop with header MBB, and check
419  // whether its metadata contains llvm.loop.unroll.disable.
420  for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
421  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
422  // Edges from other loops to MBB are not back edges.
423  continue;
424  }
425  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
426  if (MDNode *LoopID =
427  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
428  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
429  return true;
430  if (MDNode *UnrollCountMD =
431  GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
432  if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
433  ->getZExtValue() == 1)
434  return true;
435  }
436  }
437  }
438  }
439  return false;
440 }
441 
442 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
444  if (isLoopHeaderOfNoUnroll(MBB))
445  OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
446 }
447 
448 void NVPTXAsmPrinter::emitFunctionEntryLabel() {
449  SmallString<128> Str;
450  raw_svector_ostream O(Str);
451 
452  if (!GlobalsEmitted) {
453  emitGlobals(*MF->getFunction().getParent());
454  GlobalsEmitted = true;
455  }
456 
457  // Set up
458  MRI = &MF->getRegInfo();
459  F = &MF->getFunction();
460  emitLinkageDirective(F, O);
461  if (isKernelFunction(*F))
462  O << ".entry ";
463  else {
464  O << ".func ";
465  printReturnValStr(*MF, O);
466  }
467 
468  CurrentFnSym->print(O, MAI);
469 
470  emitFunctionParamList(*MF, O);
471 
472  if (isKernelFunction(*F))
473  emitKernelFunctionDirectives(*F, O);
474 
475  OutStreamer->emitRawText(O.str());
476 
477  VRegMapping.clear();
478  // Emit open brace for function body.
479  OutStreamer->emitRawText(StringRef("{\n"));
480  setAndEmitFunctionVirtualRegisters(*MF);
481  // Emit initial .loc debug directive for correct relocation symbol data.
482  if (MMI && MMI->hasDebugInfo())
484 }
485 
487  bool Result = AsmPrinter::runOnMachineFunction(F);
488  // Emit closing brace for the body of function F.
489  // The closing brace must be emitted here because we need to emit additional
490  // debug labels/data after the last basic block.
491  // We need to emit the closing brace here because we don't have function that
492  // finished emission of the function body.
493  OutStreamer->emitRawText(StringRef("}\n"));
494  return Result;
495 }
496 
497 void NVPTXAsmPrinter::emitFunctionBodyStart() {
498  SmallString<128> Str;
499  raw_svector_ostream O(Str);
500  emitDemotedVars(&MF->getFunction(), O);
501  OutStreamer->emitRawText(O.str());
502 }
503 
504 void NVPTXAsmPrinter::emitFunctionBodyEnd() {
505  VRegMapping.clear();
506 }
507 
509  SmallString<128> Str;
511  return OutContext.getOrCreateSymbol(Str);
512 }
513 
514 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
515  Register RegNo = MI->getOperand(0).getReg();
516  if (Register::isVirtualRegister(RegNo)) {
517  OutStreamer->AddComment(Twine("implicit-def: ") +
518  getVirtualRegisterName(RegNo));
519  } else {
520  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
521  OutStreamer->AddComment(Twine("implicit-def: ") +
522  STI.getRegisterInfo()->getName(RegNo));
523  }
524  OutStreamer->addBlankLine();
525 }
526 
527 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
528  raw_ostream &O) const {
529  // If the NVVM IR has some of reqntid* specified, then output
530  // the reqntid directive, and set the unspecified ones to 1.
531  // If none of reqntid* is specified, don't output reqntid directive.
532  unsigned reqntidx, reqntidy, reqntidz;
533  bool specified = false;
534  if (!getReqNTIDx(F, reqntidx))
535  reqntidx = 1;
536  else
537  specified = true;
538  if (!getReqNTIDy(F, reqntidy))
539  reqntidy = 1;
540  else
541  specified = true;
542  if (!getReqNTIDz(F, reqntidz))
543  reqntidz = 1;
544  else
545  specified = true;
546 
547  if (specified)
548  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
549  << "\n";
550 
551  // If the NVVM IR has some of maxntid* specified, then output
552  // the maxntid directive, and set the unspecified ones to 1.
553  // If none of maxntid* is specified, don't output maxntid directive.
554  unsigned maxntidx, maxntidy, maxntidz;
555  specified = false;
556  if (!getMaxNTIDx(F, maxntidx))
557  maxntidx = 1;
558  else
559  specified = true;
560  if (!getMaxNTIDy(F, maxntidy))
561  maxntidy = 1;
562  else
563  specified = true;
564  if (!getMaxNTIDz(F, maxntidz))
565  maxntidz = 1;
566  else
567  specified = true;
568 
569  if (specified)
570  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
571  << "\n";
572 
573  unsigned mincta;
574  if (getMinCTASm(F, mincta))
575  O << ".minnctapersm " << mincta << "\n";
576 
577  unsigned maxnreg;
578  if (getMaxNReg(F, maxnreg))
579  O << ".maxnreg " << maxnreg << "\n";
580 }
581 
582 std::string
584  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
585 
586  std::string Name;
587  raw_string_ostream NameStr(Name);
588 
589  VRegRCMap::const_iterator I = VRegMapping.find(RC);
590  assert(I != VRegMapping.end() && "Bad register class");
591  const DenseMap<unsigned, unsigned> &RegMap = I->second;
592 
594  assert(VI != RegMap.end() && "Bad virtual register");
595  unsigned MappedVR = VI->second;
596 
597  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
598 
599  NameStr.flush();
600  return Name;
601 }
602 
603 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
604  raw_ostream &O) {
605  O << getVirtualRegisterName(vr);
606 }
607 
608 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
609  emitLinkageDirective(F, O);
610  if (isKernelFunction(*F))
611  O << ".entry ";
612  else
613  O << ".func ";
614  printReturnValStr(F, O);
615  getSymbol(F)->print(O, MAI);
616  O << "\n";
617  emitFunctionParamList(F, O);
618  O << ";\n";
619 }
620 
621 static bool usedInGlobalVarDef(const Constant *C) {
622  if (!C)
623  return false;
624 
625  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
626  return GV->getName() != "llvm.used";
627  }
628 
629  for (const User *U : C->users())
630  if (const Constant *C = dyn_cast<Constant>(U))
631  if (usedInGlobalVarDef(C))
632  return true;
633 
634  return false;
635 }
636 
637 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
638  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
639  if (othergv->getName() == "llvm.used")
640  return true;
641  }
642 
643  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
644  if (instr->getParent() && instr->getParent()->getParent()) {
645  const Function *curFunc = instr->getParent()->getParent();
646  if (oneFunc && (curFunc != oneFunc))
647  return false;
648  oneFunc = curFunc;
649  return true;
650  } else
651  return false;
652  }
653 
654  for (const User *UU : U->users())
655  if (!usedInOneFunc(UU, oneFunc))
656  return false;
657 
658  return true;
659 }
660 
661 /* Find out if a global variable can be demoted to local scope.
662  * Currently, this is valid for CUDA shared variables, which have local
663  * scope and global lifetime. So the conditions to check are :
664  * 1. Is the global variable in shared address space?
665  * 2. Does it have internal linkage?
666  * 3. Is the global variable referenced only in one function?
667  */
668 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
669  if (!gv->hasInternalLinkage())
670  return false;
671  PointerType *Pty = gv->getType();
673  return false;
674 
675  const Function *oneFunc = nullptr;
676 
677  bool flag = usedInOneFunc(gv, oneFunc);
678  if (!flag)
679  return false;
680  if (!oneFunc)
681  return false;
682  f = oneFunc;
683  return true;
684 }
685 
686 static bool useFuncSeen(const Constant *C,
688  for (const User *U : C->users()) {
689  if (const Constant *cu = dyn_cast<Constant>(U)) {
690  if (useFuncSeen(cu, seenMap))
691  return true;
692  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
693  const BasicBlock *bb = I->getParent();
694  if (!bb)
695  continue;
696  const Function *caller = bb->getParent();
697  if (!caller)
698  continue;
699  if (seenMap.find(caller) != seenMap.end())
700  return true;
701  }
702  }
703  return false;
704 }
705 
706 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
708  for (const Function &F : M) {
709  if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
710  emitDeclaration(&F, O);
711  continue;
712  }
713 
714  if (F.isDeclaration()) {
715  if (F.use_empty())
716  continue;
717  if (F.getIntrinsicID())
718  continue;
719  emitDeclaration(&F, O);
720  continue;
721  }
722  for (const User *U : F.users()) {
723  if (const Constant *C = dyn_cast<Constant>(U)) {
724  if (usedInGlobalVarDef(C)) {
725  // The use is in the initialization of a global variable
726  // that is a function pointer, so print a declaration
727  // for the original function
728  emitDeclaration(&F, O);
729  break;
730  }
731  // Emit a declaration of this function if the function that
732  // uses this constant expr has already been seen.
733  if (useFuncSeen(C, seenMap)) {
734  emitDeclaration(&F, O);
735  break;
736  }
737  }
738 
739  if (!isa<Instruction>(U))
740  continue;
741  const Instruction *instr = cast<Instruction>(U);
742  const BasicBlock *bb = instr->getParent();
743  if (!bb)
744  continue;
745  const Function *caller = bb->getParent();
746  if (!caller)
747  continue;
748 
749  // If a caller has already been seen, then the caller is
750  // appearing in the module before the callee. so print out
751  // a declaration for the callee.
752  if (seenMap.find(caller) != seenMap.end()) {
753  emitDeclaration(&F, O);
754  break;
755  }
756  }
757  seenMap[&F] = true;
758  }
759 }
760 
762  if (!GV) return true;
763  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
764  if (!InitList) return true; // Not an array; we don't know how to parse.
765  return InitList->getNumOperands() == 0;
766 }
767 
768 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
769  // Construct a default subtarget off of the TargetMachine defaults. The
770  // rest of NVPTX isn't friendly to change subtargets per function and
771  // so the default TargetMachine will have all of the options.
772  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
773  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
774  SmallString<128> Str1;
775  raw_svector_ostream OS1(Str1);
776 
777  // Emit header before any dwarf directives are emitted below.
778  emitHeader(M, OS1, *STI);
779  OutStreamer->emitRawText(OS1.str());
780 }
781 
783  if (M.alias_size()) {
784  report_fatal_error("Module has aliases, which NVPTX does not support.");
785  return true; // error
786  }
787  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
789  "Module has a nontrivial global ctor, which NVPTX does not support.");
790  return true; // error
791  }
792  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
794  "Module has a nontrivial global dtor, which NVPTX does not support.");
795  return true; // error
796  }
797 
798  // We need to call the parent's one explicitly.
799  bool Result = AsmPrinter::doInitialization(M);
800 
801  GlobalsEmitted = false;
802 
803  return Result;
804 }
805 
806 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
807  SmallString<128> Str2;
808  raw_svector_ostream OS2(Str2);
809 
810  emitDeclarations(M, OS2);
811 
812  // As ptxas does not support forward references of globals, we need to first
813  // sort the list of module-level globals in def-use order. We visit each
814  // global variable in order, and ensure that we emit it *after* its dependent
815  // globals. We use a little extra memory maintaining both a set and a list to
816  // have fast searches while maintaining a strict ordering.
820 
821  // Visit each global variable, in order
822  for (const GlobalVariable &I : M.globals())
823  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
824 
825  assert(GVVisited.size() == M.getGlobalList().size() &&
826  "Missed a global variable");
827  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
828 
829  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
830  const NVPTXSubtarget &STI =
831  *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
832 
833  // Print out module-level global variables in proper order
834  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
835  printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
836 
837  OS2 << '\n';
838 
839  OutStreamer->emitRawText(OS2.str());
840 }
841 
842 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
843  const NVPTXSubtarget &STI) {
844  O << "//\n";
845  O << "// Generated by LLVM NVPTX Back-End\n";
846  O << "//\n";
847  O << "\n";
848 
849  unsigned PTXVersion = STI.getPTXVersion();
850  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
851 
852  O << ".target ";
853  O << STI.getTargetName();
854 
855  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
856  if (NTM.getDrvInterface() == NVPTX::NVCL)
857  O << ", texmode_independent";
858 
859  bool HasFullDebugInfo = false;
860  for (DICompileUnit *CU : M.debug_compile_units()) {
861  switch(CU->getEmissionKind()) {
864  break;
867  HasFullDebugInfo = true;
868  break;
869  }
870  if (HasFullDebugInfo)
871  break;
872  }
873  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
874  O << ", debug";
875 
876  O << "\n";
877 
878  O << ".address_size ";
879  if (NTM.is64Bit())
880  O << "64";
881  else
882  O << "32";
883  O << "\n";
884 
885  O << "\n";
886 }
887 
889  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
890 
891  // If we did not emit any functions, then the global declarations have not
892  // yet been emitted.
893  if (!GlobalsEmitted) {
894  emitGlobals(M);
895  GlobalsEmitted = true;
896  }
897 
898  // call doFinalization
900 
902 
903  auto *TS =
904  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
905  // Close the last emitted section
906  if (HasDebugInfo) {
907  TS->closeLastSection();
908  // Emit empty .debug_loc section for better support of the empty files.
909  OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
910  }
911 
912  // Output last DWARF .file directives, if any.
913  TS->outputDwarfFileDirectives();
914 
915  return ret;
916 
917  //bool Result = AsmPrinter::doFinalization(M);
918  // Instead of calling the parents doFinalization, we may
919  // clone parents doFinalization and customize here.
920  // Currently, we if NVISA out the EmitGlobals() in
921  // parent's doFinalization, which is too intrusive.
922  //
923  // Same for the doInitialization.
924  //return Result;
925 }
926 
927 // This function emits appropriate linkage directives for
928 // functions and global variables.
929 //
930 // extern function declaration -> .extern
931 // extern function definition -> .visible
932 // external global variable with init -> .visible
933 // external without init -> .extern
934 // appending -> not allowed, assert.
935 // for any linkage other than
936 // internal, private, linker_private,
937 // linker_private_weak, linker_private_weak_def_auto,
938 // we emit -> .weak.
939 
940 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
941  raw_ostream &O) {
942  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
943  if (V->hasExternalLinkage()) {
944  if (isa<GlobalVariable>(V)) {
945  const GlobalVariable *GVar = cast<GlobalVariable>(V);
946  if (GVar) {
947  if (GVar->hasInitializer())
948  O << ".visible ";
949  else
950  O << ".extern ";
951  }
952  } else if (V->isDeclaration())
953  O << ".extern ";
954  else
955  O << ".visible ";
956  } else if (V->hasAppendingLinkage()) {
957  std::string msg;
958  msg.append("Error: ");
959  msg.append("Symbol ");
960  if (V->hasName())
961  msg.append(std::string(V->getName()));
962  msg.append("has unsupported appending linkage type");
963  llvm_unreachable(msg.c_str());
964  } else if (!V->hasInternalLinkage() &&
965  !V->hasPrivateLinkage()) {
966  O << ".weak ";
967  }
968  }
969 }
970 
971 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
972  raw_ostream &O, bool processDemoted,
973  const NVPTXSubtarget &STI) {
974  // Skip meta data
975  if (GVar->hasSection()) {
976  if (GVar->getSection() == "llvm.metadata")
977  return;
978  }
979 
980  // Skip LLVM intrinsic global variables
981  if (GVar->getName().startswith("llvm.") ||
982  GVar->getName().startswith("nvvm."))
983  return;
984 
985  const DataLayout &DL = getDataLayout();
986 
987  // GlobalVariables are always constant pointers themselves.
988  PointerType *PTy = GVar->getType();
989  Type *ETy = GVar->getValueType();
990 
991  if (GVar->hasExternalLinkage()) {
992  if (GVar->hasInitializer())
993  O << ".visible ";
994  else
995  O << ".extern ";
996  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
998  GVar->hasCommonLinkage()) {
999  O << ".weak ";
1000  }
1001 
1002  if (isTexture(*GVar)) {
1003  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1004  return;
1005  }
1006 
1007  if (isSurface(*GVar)) {
1008  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1009  return;
1010  }
1011 
1012  if (GVar->isDeclaration()) {
1013  // (extern) declarations, no definition or initializer
1014  // Currently the only known declaration is for an automatic __local
1015  // (.shared) promoted to global.
1016  emitPTXGlobalVariable(GVar, O, STI);
1017  O << ";\n";
1018  return;
1019  }
1020 
1021  if (isSampler(*GVar)) {
1022  O << ".global .samplerref " << getSamplerName(*GVar);
1023 
1024  const Constant *Initializer = nullptr;
1025  if (GVar->hasInitializer())
1026  Initializer = GVar->getInitializer();
1027  const ConstantInt *CI = nullptr;
1028  if (Initializer)
1029  CI = dyn_cast<ConstantInt>(Initializer);
1030  if (CI) {
1031  unsigned sample = CI->getZExtValue();
1032 
1033  O << " = { ";
1034 
1035  for (int i = 0,
1036  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1037  i < 3; i++) {
1038  O << "addr_mode_" << i << " = ";
1039  switch (addr) {
1040  case 0:
1041  O << "wrap";
1042  break;
1043  case 1:
1044  O << "clamp_to_border";
1045  break;
1046  case 2:
1047  O << "clamp_to_edge";
1048  break;
1049  case 3:
1050  O << "wrap";
1051  break;
1052  case 4:
1053  O << "mirror";
1054  break;
1055  }
1056  O << ", ";
1057  }
1058  O << "filter_mode = ";
1059  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1060  case 0:
1061  O << "nearest";
1062  break;
1063  case 1:
1064  O << "linear";
1065  break;
1066  case 2:
1067  llvm_unreachable("Anisotropic filtering is not supported");
1068  default:
1069  O << "nearest";
1070  break;
1071  }
1072  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1073  O << ", force_unnormalized_coords = 1";
1074  }
1075  O << " }";
1076  }
1077 
1078  O << ";\n";
1079  return;
1080  }
1081 
1082  if (GVar->hasPrivateLinkage()) {
1083  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1084  return;
1085 
1086  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1087  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1088  return;
1089  if (GVar->use_empty())
1090  return;
1091  }
1092 
1093  const Function *demotedFunc = nullptr;
1094  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1095  O << "// " << GVar->getName() << " has been demoted\n";
1096  if (localDecls.find(demotedFunc) != localDecls.end())
1097  localDecls[demotedFunc].push_back(GVar);
1098  else {
1099  std::vector<const GlobalVariable *> temp;
1100  temp.push_back(GVar);
1101  localDecls[demotedFunc] = temp;
1102  }
1103  return;
1104  }
1105 
1106  O << ".";
1107  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1108 
1109  if (isManaged(*GVar)) {
1110  if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1112  ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1113  }
1114  O << " .attribute(.managed)";
1115  }
1116 
1117  if (MaybeAlign A = GVar->getAlign())
1118  O << " .align " << A->value();
1119  else
1120  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1121 
1122  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1123  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1124  O << " .";
1125  // Special case: ABI requires that we use .u8 for predicates
1126  if (ETy->isIntegerTy(1))
1127  O << "u8";
1128  else
1129  O << getPTXFundamentalTypeStr(ETy, false);
1130  O << " ";
1131  getSymbol(GVar)->print(O, MAI);
1132 
1133  // Ptx allows variable initilization only for constant and global state
1134  // spaces.
1135  if (GVar->hasInitializer()) {
1136  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1137  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1138  const Constant *Initializer = GVar->getInitializer();
1139  // 'undef' is treated as there is no value specified.
1140  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1141  O << " = ";
1142  printScalarConstant(Initializer, O);
1143  }
1144  } else {
1145  // The frontend adds zero-initializer to device and constant variables
1146  // that don't have an initial value, and UndefValue to shared
1147  // variables, so skip warning for this case.
1148  if (!GVar->getInitializer()->isNullValue() &&
1149  !isa<UndefValue>(GVar->getInitializer())) {
1150  report_fatal_error("initial value of '" + GVar->getName() +
1151  "' is not allowed in addrspace(" +
1152  Twine(PTy->getAddressSpace()) + ")");
1153  }
1154  }
1155  }
1156  } else {
1157  unsigned int ElementSize = 0;
1158 
1159  // Although PTX has direct support for struct type and array type and
1160  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1161  // targets that support these high level field accesses. Structs, arrays
1162  // and vectors are lowered into arrays of bytes.
1163  switch (ETy->getTypeID()) {
1164  case Type::IntegerTyID: // Integers larger than 64 bits
1165  case Type::StructTyID:
1166  case Type::ArrayTyID:
1167  case Type::FixedVectorTyID:
1168  ElementSize = DL.getTypeStoreSize(ETy);
1169  // Ptx allows variable initilization only for constant and
1170  // global state spaces.
1171  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1172  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1173  GVar->hasInitializer()) {
1174  const Constant *Initializer = GVar->getInitializer();
1175  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1176  AggBuffer aggBuffer(ElementSize, *this);
1177  bufferAggregateConstant(Initializer, &aggBuffer);
1178  if (aggBuffer.numSymbols()) {
1179  unsigned int ptrSize = MAI->getCodePointerSize();
1180  if (ElementSize % ptrSize ||
1181  !aggBuffer.allSymbolsAligned(ptrSize)) {
1182  // Print in bytes and use the mask() operator for pointers.
1183  if (!STI.hasMaskOperator())
1185  "initialized packed aggregate with pointers '" +
1186  GVar->getName() +
1187  "' requires at least PTX ISA version 7.1");
1188  O << " .u8 ";
1189  getSymbol(GVar)->print(O, MAI);
1190  O << "[" << ElementSize << "] = {";
1191  aggBuffer.printBytes(O);
1192  O << "}";
1193  } else {
1194  O << " .u" << ptrSize * 8 << " ";
1195  getSymbol(GVar)->print(O, MAI);
1196  O << "[" << ElementSize / ptrSize << "] = {";
1197  aggBuffer.printWords(O);
1198  O << "}";
1199  }
1200  } else {
1201  O << " .b8 ";
1202  getSymbol(GVar)->print(O, MAI);
1203  O << "[" << ElementSize << "] = {";
1204  aggBuffer.printBytes(O);
1205  O << "}";
1206  }
1207  } else {
1208  O << " .b8 ";
1209  getSymbol(GVar)->print(O, MAI);
1210  if (ElementSize) {
1211  O << "[";
1212  O << ElementSize;
1213  O << "]";
1214  }
1215  }
1216  } else {
1217  O << " .b8 ";
1218  getSymbol(GVar)->print(O, MAI);
1219  if (ElementSize) {
1220  O << "[";
1221  O << ElementSize;
1222  O << "]";
1223  }
1224  }
1225  break;
1226  default:
1227  llvm_unreachable("type not supported yet");
1228  }
1229  }
1230  O << ";\n";
1231 }
1232 
1233 void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1234  const Value *v = Symbols[nSym];
1235  const Value *v0 = SymbolsBeforeStripping[nSym];
1236  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1237  MCSymbol *Name = AP.getSymbol(GVar);
1238  PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1239  // Is v0 a generic pointer?
1240  bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1241  if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1242  os << "generic(";
1243  Name->print(os, AP.MAI);
1244  os << ")";
1245  } else {
1246  Name->print(os, AP.MAI);
1247  }
1248  } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1249  const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1250  AP.printMCExpr(*Expr, os);
1251  } else
1252  llvm_unreachable("symbol type unknown");
1253 }
1254 
1255 void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1256  unsigned int ptrSize = AP.MAI->getCodePointerSize();
1257  symbolPosInBuffer.push_back(size);
1258  unsigned int nSym = 0;
1259  unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1260  for (unsigned int pos = 0; pos < size;) {
1261  if (pos)
1262  os << ", ";
1263  if (pos != nextSymbolPos) {
1264  os << (unsigned int)buffer[pos];
1265  ++pos;
1266  continue;
1267  }
1268  // Generate a per-byte mask() operator for the symbol, which looks like:
1269  // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1270  // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1271  std::string symText;
1272  llvm::raw_string_ostream oss(symText);
1273  printSymbol(nSym, oss);
1274  for (unsigned i = 0; i < ptrSize; ++i) {
1275  if (i)
1276  os << ", ";
1277  llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1278  os << "(" << symText << ")";
1279  }
1280  pos += ptrSize;
1281  nextSymbolPos = symbolPosInBuffer[++nSym];
1282  assert(nextSymbolPos >= pos);
1283  }
1284 }
1285 
1286 void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1287  unsigned int ptrSize = AP.MAI->getCodePointerSize();
1288  symbolPosInBuffer.push_back(size);
1289  unsigned int nSym = 0;
1290  unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1291  assert(nextSymbolPos % ptrSize == 0);
1292  for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1293  if (pos)
1294  os << ", ";
1295  if (pos == nextSymbolPos) {
1296  printSymbol(nSym, os);
1297  nextSymbolPos = symbolPosInBuffer[++nSym];
1298  assert(nextSymbolPos % ptrSize == 0);
1299  assert(nextSymbolPos >= pos + ptrSize);
1300  } else if (ptrSize == 4)
1301  os << support::endian::read32le(&buffer[pos]);
1302  else
1303  os << support::endian::read64le(&buffer[pos]);
1304  }
1305 }
1306 
1307 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1308  if (localDecls.find(f) == localDecls.end())
1309  return;
1310 
1311  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1312 
1313  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1314  const NVPTXSubtarget &STI =
1315  *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1316 
1317  for (const GlobalVariable *GV : gvars) {
1318  O << "\t// demoted variable\n\t";
1319  printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1320  }
1321 }
1322 
1323 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1324  raw_ostream &O) const {
1325  switch (AddressSpace) {
1326  case ADDRESS_SPACE_LOCAL:
1327  O << "local";
1328  break;
1329  case ADDRESS_SPACE_GLOBAL:
1330  O << "global";
1331  break;
1332  case ADDRESS_SPACE_CONST:
1333  O << "const";
1334  break;
1335  case ADDRESS_SPACE_SHARED:
1336  O << "shared";
1337  break;
1338  default:
1339  report_fatal_error("Bad address space found while emitting PTX: " +
1341  break;
1342  }
1343 }
1344 
1345 std::string
1346 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1347  switch (Ty->getTypeID()) {
1348  case Type::IntegerTyID: {
1349  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1350  if (NumBits == 1)
1351  return "pred";
1352  else if (NumBits <= 64) {
1353  std::string name = "u";
1354  return name + utostr(NumBits);
1355  } else {
1356  llvm_unreachable("Integer too large");
1357  break;
1358  }
1359  break;
1360  }
1361  case Type::HalfTyID:
1362  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1363  return "b16";
1364  case Type::FloatTyID:
1365  return "f32";
1366  case Type::DoubleTyID:
1367  return "f64";
1368  case Type::PointerTyID: {
1369  unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1370  assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1371 
1372  if (PtrSize == 64)
1373  if (useB4PTR)
1374  return "b64";
1375  else
1376  return "u64";
1377  else if (useB4PTR)
1378  return "b32";
1379  else
1380  return "u32";
1381  }
1382  default:
1383  break;
1384  }
1385  llvm_unreachable("unexpected type");
1386 }
1387 
1388 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1389  raw_ostream &O,
1390  const NVPTXSubtarget &STI) {
1391  const DataLayout &DL = getDataLayout();
1392 
1393  // GlobalVariables are always constant pointers themselves.
1394  Type *ETy = GVar->getValueType();
1395 
1396  O << ".";
1397  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1398  if (isManaged(*GVar)) {
1399  if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1401  ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1402  }
1403  O << " .attribute(.managed)";
1404  }
1405  if (MaybeAlign A = GVar->getAlign())
1406  O << " .align " << A->value();
1407  else
1408  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1409 
1410  // Special case for i128
1411  if (ETy->isIntegerTy(128)) {
1412  O << " .b8 ";
1413  getSymbol(GVar)->print(O, MAI);
1414  O << "[16]";
1415  return;
1416  }
1417 
1418  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1419  O << " .";
1420  O << getPTXFundamentalTypeStr(ETy);
1421  O << " ";
1422  getSymbol(GVar)->print(O, MAI);
1423  return;
1424  }
1425 
1426  int64_t ElementSize = 0;
1427 
1428  // Although PTX has direct support for struct type and array type and LLVM IR
1429  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1430  // support these high level field accesses. Structs and arrays are lowered
1431  // into arrays of bytes.
1432  switch (ETy->getTypeID()) {
1433  case Type::StructTyID:
1434  case Type::ArrayTyID:
1435  case Type::FixedVectorTyID:
1436  ElementSize = DL.getTypeStoreSize(ETy);
1437  O << " .b8 ";
1438  getSymbol(GVar)->print(O, MAI);
1439  O << "[";
1440  if (ElementSize) {
1441  O << ElementSize;
1442  }
1443  O << "]";
1444  break;
1445  default:
1446  llvm_unreachable("type not supported yet");
1447  }
1448 }
1449 
1450 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1451  int paramIndex, raw_ostream &O) {
1452  getSymbol(I->getParent())->print(O, MAI);
1453  O << "_param_" << paramIndex;
1454 }
1455 
1456 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1457  const DataLayout &DL = getDataLayout();
1458  const AttributeList &PAL = F->getAttributes();
1459  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1460  const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1461 
1463  unsigned paramIndex = 0;
1464  bool first = true;
1465  bool isKernelFunc = isKernelFunction(*F);
1466  bool isABI = (STI.getSmVersion() >= 20);
1467  bool hasImageHandles = STI.hasImageHandles();
1468 
1469  if (F->arg_empty()) {
1470  O << "()\n";
1471  return;
1472  }
1473 
1474  O << "(\n";
1475 
1476  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1477  Type *Ty = I->getType();
1478 
1479  if (!first)
1480  O << ",\n";
1481 
1482  first = false;
1483 
1484  // Handle image/sampler parameters
1485  if (isKernelFunction(*F)) {
1486  if (isSampler(*I) || isImage(*I)) {
1487  if (isImage(*I)) {
1488  std::string sname = std::string(I->getName());
1489  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1490  if (hasImageHandles)
1491  O << "\t.param .u64 .ptr .surfref ";
1492  else
1493  O << "\t.param .surfref ";
1494  CurrentFnSym->print(O, MAI);
1495  O << "_param_" << paramIndex;
1496  }
1497  else { // Default image is read_only
1498  if (hasImageHandles)
1499  O << "\t.param .u64 .ptr .texref ";
1500  else
1501  O << "\t.param .texref ";
1502  CurrentFnSym->print(O, MAI);
1503  O << "_param_" << paramIndex;
1504  }
1505  } else {
1506  if (hasImageHandles)
1507  O << "\t.param .u64 .ptr .samplerref ";
1508  else
1509  O << "\t.param .samplerref ";
1510  CurrentFnSym->print(O, MAI);
1511  O << "_param_" << paramIndex;
1512  }
1513  continue;
1514  }
1515  }
1516 
1517  auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1518  paramIndex](Type *Ty) -> Align {
1519  Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1520  MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1521  return std::max(TypeAlign, ParamAlign.valueOrOne());
1522  };
1523 
1524  if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1525  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1526  // Just print .param .align <a> .b8 .param[size];
1527  // <a> = optimal alignment for the element type; always multiple of
1528  // PAL.getParamAlignment
1529  // size = typeallocsize of element type
1530  Align OptimalAlign = getOptimalAlignForParam(Ty);
1531 
1532  O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1533  printParamName(I, paramIndex, O);
1534  O << "[" << DL.getTypeAllocSize(Ty) << "]";
1535 
1536  continue;
1537  }
1538  // Just a scalar
1539  auto *PTy = dyn_cast<PointerType>(Ty);
1540  unsigned PTySizeInBits = 0;
1541  if (PTy) {
1542  PTySizeInBits =
1543  TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1544  assert(PTySizeInBits && "Invalid pointer size");
1545  }
1546 
1547  if (isKernelFunc) {
1548  if (PTy) {
1549  // Special handling for pointer arguments to kernel
1550  O << "\t.param .u" << PTySizeInBits << " ";
1551 
1552  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1553  NVPTX::CUDA) {
1554  int addrSpace = PTy->getAddressSpace();
1555  switch (addrSpace) {
1556  default:
1557  O << ".ptr ";
1558  break;
1559  case ADDRESS_SPACE_CONST:
1560  O << ".ptr .const ";
1561  break;
1562  case ADDRESS_SPACE_SHARED:
1563  O << ".ptr .shared ";
1564  break;
1565  case ADDRESS_SPACE_GLOBAL:
1566  O << ".ptr .global ";
1567  break;
1568  }
1569  Align ParamAlign = I->getParamAlign().valueOrOne();
1570  O << ".align " << ParamAlign.value() << " ";
1571  }
1572  printParamName(I, paramIndex, O);
1573  continue;
1574  }
1575 
1576  // non-pointer scalar to kernel func
1577  O << "\t.param .";
1578  // Special case: predicate operands become .u8 types
1579  if (Ty->isIntegerTy(1))
1580  O << "u8";
1581  else
1582  O << getPTXFundamentalTypeStr(Ty);
1583  O << " ";
1584  printParamName(I, paramIndex, O);
1585  continue;
1586  }
1587  // Non-kernel function, just print .param .b<size> for ABI
1588  // and .reg .b<size> for non-ABI
1589  unsigned sz = 0;
1590  if (isa<IntegerType>(Ty)) {
1591  sz = cast<IntegerType>(Ty)->getBitWidth();
1592  sz = promoteScalarArgumentSize(sz);
1593  } else if (PTy) {
1594  assert(PTySizeInBits && "Invalid pointer size");
1595  sz = PTySizeInBits;
1596  } else if (Ty->isHalfTy())
1597  // PTX ABI requires all scalar parameters to be at least 32
1598  // bits in size. fp16 normally uses .b16 as its storage type
1599  // in PTX, so its size must be adjusted here, too.
1600  sz = 32;
1601  else
1602  sz = Ty->getPrimitiveSizeInBits();
1603  if (isABI)
1604  O << "\t.param .b" << sz << " ";
1605  else
1606  O << "\t.reg .b" << sz << " ";
1607  printParamName(I, paramIndex, O);
1608  continue;
1609  }
1610 
1611  // param has byVal attribute.
1612  Type *ETy = PAL.getParamByValType(paramIndex);
1613  assert(ETy && "Param should have byval type");
1614 
1615  if (isABI || isKernelFunc) {
1616  // Just print .param .align <a> .b8 .param[size];
1617  // <a> = optimal alignment for the element type; always multiple of
1618  // PAL.getParamAlignment
1619  // size = typeallocsize of element type
1620  Align OptimalAlign = getOptimalAlignForParam(ETy);
1621 
1622  // Work around a bug in ptxas. When PTX code takes address of
1623  // byval parameter with alignment < 4, ptxas generates code to
1624  // spill argument into memory. Alas on sm_50+ ptxas generates
1625  // SASS code that fails with misaligned access. To work around
1626  // the problem, make sure that we align byval parameters by at
1627  // least 4. Matching change must be made in LowerCall() where we
1628  // prepare parameters for the call.
1629  //
1630  // TODO: this will need to be undone when we get to support multi-TU
1631  // device-side compilation as it breaks ABI compatibility with nvcc.
1632  // Hopefully ptxas bug is fixed by then.
1633  if (!isKernelFunc && OptimalAlign < Align(4))
1634  OptimalAlign = Align(4);
1635  unsigned sz = DL.getTypeAllocSize(ETy);
1636  O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1637  printParamName(I, paramIndex, O);
1638  O << "[" << sz << "]";
1639  continue;
1640  } else {
1641  // Split the ETy into constituent parts and
1642  // print .param .b<size> <name> for each part.
1643  // Further, if a part is vector, print the above for
1644  // each vector element.
1645  SmallVector<EVT, 16> vtparts;
1646  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1647  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1648  unsigned elems = 1;
1649  EVT elemtype = vtparts[i];
1650  if (vtparts[i].isVector()) {
1651  elems = vtparts[i].getVectorNumElements();
1652  elemtype = vtparts[i].getVectorElementType();
1653  }
1654 
1655  for (unsigned j = 0, je = elems; j != je; ++j) {
1656  unsigned sz = elemtype.getSizeInBits();
1657  if (elemtype.isInteger())
1658  sz = promoteScalarArgumentSize(sz);
1659  O << "\t.reg .b" << sz << " ";
1660  printParamName(I, paramIndex, O);
1661  if (j < je - 1)
1662  O << ",\n";
1663  ++paramIndex;
1664  }
1665  if (i < e - 1)
1666  O << ",\n";
1667  }
1668  --paramIndex;
1669  continue;
1670  }
1671  }
1672 
1673  O << "\n)\n";
1674 }
1675 
1676 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1677  raw_ostream &O) {
1678  const Function &F = MF.getFunction();
1679  emitFunctionParamList(&F, O);
1680 }
1681 
1682 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1683  const MachineFunction &MF) {
1684  SmallString<128> Str;
1685  raw_svector_ostream O(Str);
1686 
1687  // Map the global virtual register number to a register class specific
1688  // virtual register number starting from 1 with that class.
1690  //unsigned numRegClasses = TRI->getNumRegClasses();
1691 
1692  // Emit the Fake Stack Object
1693  const MachineFrameInfo &MFI = MF.getFrameInfo();
1694  int NumBytes = (int) MFI.getStackSize();
1695  if (NumBytes) {
1696  O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1697  << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1698  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1699  O << "\t.reg .b64 \t%SP;\n";
1700  O << "\t.reg .b64 \t%SPL;\n";
1701  } else {
1702  O << "\t.reg .b32 \t%SP;\n";
1703  O << "\t.reg .b32 \t%SPL;\n";
1704  }
1705  }
1706 
1707  // Go through all virtual registers to establish the mapping between the
1708  // global virtual
1709  // register number and the per class virtual register number.
1710  // We use the per class virtual register number in the ptx output.
1711  unsigned int numVRs = MRI->getNumVirtRegs();
1712  for (unsigned i = 0; i < numVRs; i++) {
1714  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1715  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1716  int n = regmap.size();
1717  regmap.insert(std::make_pair(vr, n + 1));
1718  }
1719 
1720  // Emit register declarations
1721  // @TODO: Extract out the real register usage
1722  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1723  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1724  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1725  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1726  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1727  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1728  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1729 
1730  // Emit declaration of the virtual registers or 'physical' registers for
1731  // each register class
1732  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1733  const TargetRegisterClass *RC = TRI->getRegClass(i);
1734  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1735  std::string rcname = getNVPTXRegClassName(RC);
1736  std::string rcStr = getNVPTXRegClassStr(RC);
1737  int n = regmap.size();
1738 
1739  // Only declare those registers that may be used.
1740  if (n) {
1741  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1742  << ">;\n";
1743  }
1744  }
1745 
1746  OutStreamer->emitRawText(O.str());
1747 }
1748 
1749 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1750  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1751  bool ignored;
1752  unsigned int numHex;
1753  const char *lead;
1754 
1755  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1756  numHex = 8;
1757  lead = "0f";
1759  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1760  numHex = 16;
1761  lead = "0d";
1763  } else
1764  llvm_unreachable("unsupported fp type");
1765 
1766  APInt API = APF.bitcastToAPInt();
1767  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1768 }
1769 
1770 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1771  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1772  O << CI->getValue();
1773  return;
1774  }
1775  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1776  printFPConstant(CFP, O);
1777  return;
1778  }
1779  if (isa<ConstantPointerNull>(CPV)) {
1780  O << "0";
1781  return;
1782  }
1783  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1784  bool IsNonGenericPointer = false;
1785  if (GVar->getType()->getAddressSpace() != 0) {
1786  IsNonGenericPointer = true;
1787  }
1788  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1789  O << "generic(";
1790  getSymbol(GVar)->print(O, MAI);
1791  O << ")";
1792  } else {
1793  getSymbol(GVar)->print(O, MAI);
1794  }
1795  return;
1796  }
1797  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1798  const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1799  printMCExpr(*E, O);
1800  return;
1801  }
1802  llvm_unreachable("Not scalar type found in printScalarConstant()");
1803 }
1804 
1805 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1806  AggBuffer *AggBuffer) {
1807  const DataLayout &DL = getDataLayout();
1808  int AllocSize = DL.getTypeAllocSize(CPV->getType());
1809  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1810  // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1811  // only the space allocated by CPV.
1812  AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1813  return;
1814  }
1815 
1816  // Helper for filling AggBuffer with APInts.
1817  auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1818  size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1819  SmallVector<unsigned char, 16> Buf(NumBytes);
1820  for (unsigned I = 0; I < NumBytes; ++I) {
1821  Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1822  }
1823  AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1824  };
1825 
1826  switch (CPV->getType()->getTypeID()) {
1827  case Type::IntegerTyID:
1828  if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1829  AddIntToBuffer(CI->getValue());
1830  break;
1831  }
1832  if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1833  if (const auto *CI =
1834  dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1835  AddIntToBuffer(CI->getValue());
1836  break;
1837  }
1838  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1839  Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1840  AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1841  AggBuffer->addZeros(AllocSize);
1842  break;
1843  }
1844  }
1845  llvm_unreachable("unsupported integer const type");
1846  break;
1847 
1848  case Type::HalfTyID:
1849  case Type::BFloatTyID:
1850  case Type::FloatTyID:
1851  case Type::DoubleTyID:
1852  AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1853  break;
1854 
1855  case Type::PointerTyID: {
1856  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1857  AggBuffer->addSymbol(GVar, GVar);
1858  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1859  const Value *v = Cexpr->stripPointerCasts();
1860  AggBuffer->addSymbol(v, Cexpr);
1861  }
1862  AggBuffer->addZeros(AllocSize);
1863  break;
1864  }
1865 
1866  case Type::ArrayTyID:
1867  case Type::FixedVectorTyID:
1868  case Type::StructTyID: {
1869  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1870  bufferAggregateConstant(CPV, AggBuffer);
1871  if (Bytes > AllocSize)
1872  AggBuffer->addZeros(Bytes - AllocSize);
1873  } else if (isa<ConstantAggregateZero>(CPV))
1874  AggBuffer->addZeros(Bytes);
1875  else
1876  llvm_unreachable("Unexpected Constant type");
1877  break;
1878  }
1879 
1880  default:
1881  llvm_unreachable("unsupported type");
1882  }
1883 }
1884 
1885 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1886  AggBuffer *aggBuffer) {
1887  const DataLayout &DL = getDataLayout();
1888  int Bytes;
1889 
1890  // Integers of arbitrary width
1891  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1892  APInt Val = CI->getValue();
1893  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1894  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1895  aggBuffer->addBytes(&Byte, 1, 1);
1896  Val.lshrInPlace(8);
1897  }
1898  return;
1899  }
1900 
1901  // Old constants
1902  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1903  if (CPV->getNumOperands())
1904  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1905  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1906  return;
1907  }
1908 
1909  if (const ConstantDataSequential *CDS =
1910  dyn_cast<ConstantDataSequential>(CPV)) {
1911  if (CDS->getNumElements())
1912  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1913  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1914  aggBuffer);
1915  return;
1916  }
1917 
1918  if (isa<ConstantStruct>(CPV)) {
1919  if (CPV->getNumOperands()) {
1920  StructType *ST = cast<StructType>(CPV->getType());
1921  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1922  if (i == (e - 1))
1923  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1924  DL.getTypeAllocSize(ST) -
1925  DL.getStructLayout(ST)->getElementOffset(i);
1926  else
1927  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1928  DL.getStructLayout(ST)->getElementOffset(i);
1929  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1930  }
1931  }
1932  return;
1933  }
1934  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1935 }
1936 
1937 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1938 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1939 /// expressions that are representable in PTX and create
1940 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1941 const MCExpr *
1942 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1943  MCContext &Ctx = OutContext;
1944 
1945  if (CV->isNullValue() || isa<UndefValue>(CV))
1946  return MCConstantExpr::create(0, Ctx);
1947 
1948  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1949  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1950 
1951  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1952  const MCSymbolRefExpr *Expr =
1954  if (ProcessingGeneric) {
1955  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1956  } else {
1957  return Expr;
1958  }
1959  }
1960 
1961  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1962  if (!CE) {
1963  llvm_unreachable("Unknown constant value to lower!");
1964  }
1965 
1966  switch (CE->getOpcode()) {
1967  default: {
1968  // If the code isn't optimized, there may be outstanding folding
1969  // opportunities. Attempt to fold the expression using DataLayout as a
1970  // last resort before giving up.
1972  if (C != CE)
1973  return lowerConstantForGV(C, ProcessingGeneric);
1974 
1975  // Otherwise report the problem to the user.
1976  std::string S;
1977  raw_string_ostream OS(S);
1978  OS << "Unsupported expression in static initializer: ";
1979  CE->printAsOperand(OS, /*PrintType=*/false,
1980  !MF ? nullptr : MF->getFunction().getParent());
1981  report_fatal_error(Twine(OS.str()));
1982  }
1983 
1984  case Instruction::AddrSpaceCast: {
1985  // Strip the addrspacecast and pass along the operand
1986  PointerType *DstTy = cast<PointerType>(CE->getType());
1987  if (DstTy->getAddressSpace() == 0) {
1988  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1989  }
1990  std::string S;
1991  raw_string_ostream OS(S);
1992  OS << "Unsupported expression in static initializer: ";
1993  CE->printAsOperand(OS, /*PrintType=*/ false,
1994  !MF ? nullptr : MF->getFunction().getParent());
1995  report_fatal_error(Twine(OS.str()));
1996  }
1997 
1998  case Instruction::GetElementPtr: {
1999  const DataLayout &DL = getDataLayout();
2000 
2001  // Generate a symbolic expression for the byte address
2002  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2003  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2004 
2005  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2006  ProcessingGeneric);
2007  if (!OffsetAI)
2008  return Base;
2009 
2010  int64_t Offset = OffsetAI.getSExtValue();
2012  Ctx);
2013  }
2014 
2015  case Instruction::Trunc:
2016  // We emit the value and depend on the assembler to truncate the generated
2017  // expression properly. This is important for differences between
2018  // blockaddress labels. Since the two labels are in the same function, it
2019  // is reasonable to treat their delta as a 32-bit value.
2020  [[fallthrough]];
2021  case Instruction::BitCast:
2022  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2023 
2024  case Instruction::IntToPtr: {
2025  const DataLayout &DL = getDataLayout();
2026 
2027  // Handle casts to pointers by changing them into casts to the appropriate
2028  // integer type. This promotes constant folding and simplifies this code.
2029  Constant *Op = CE->getOperand(0);
2030  Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2031  false/*ZExt*/);
2032  return lowerConstantForGV(Op, ProcessingGeneric);
2033  }
2034 
2035  case Instruction::PtrToInt: {
2036  const DataLayout &DL = getDataLayout();
2037 
2038  // Support only foldable casts to/from pointers that can be eliminated by
2039  // changing the pointer to the appropriately sized integer type.
2040  Constant *Op = CE->getOperand(0);
2041  Type *Ty = CE->getType();
2042 
2043  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2044 
2045  // We can emit the pointer value into this slot if the slot is an
2046  // integer slot equal to the size of the pointer.
2047  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2048  return OpExpr;
2049 
2050  // Otherwise the pointer is smaller than the resultant integer, mask off
2051  // the high bits so we are sure to get a proper truncation if the input is
2052  // a constant expr.
2053  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2054  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2055  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2056  }
2057 
2058  // The MC library also has a right-shift operator, but it isn't consistently
2059  // signed or unsigned between different targets.
2060  case Instruction::Add: {
2061  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2062  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2063  switch (CE->getOpcode()) {
2064  default: llvm_unreachable("Unknown binary operator constant cast expr");
2065  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2066  }
2067  }
2068  }
2069 }
2070 
2071 // Copy of MCExpr::print customized for NVPTX
2072 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2073  switch (Expr.getKind()) {
2074  case MCExpr::Target:
2075  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2076  case MCExpr::Constant:
2077  OS << cast<MCConstantExpr>(Expr).getValue();
2078  return;
2079 
2080  case MCExpr::SymbolRef: {
2081  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2082  const MCSymbol &Sym = SRE.getSymbol();
2083  Sym.print(OS, MAI);
2084  return;
2085  }
2086 
2087  case MCExpr::Unary: {
2088  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2089  switch (UE.getOpcode()) {
2090  case MCUnaryExpr::LNot: OS << '!'; break;
2091  case MCUnaryExpr::Minus: OS << '-'; break;
2092  case MCUnaryExpr::Not: OS << '~'; break;
2093  case MCUnaryExpr::Plus: OS << '+'; break;
2094  }
2095  printMCExpr(*UE.getSubExpr(), OS);
2096  return;
2097  }
2098 
2099  case MCExpr::Binary: {
2100  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2101 
2102  // Only print parens around the LHS if it is non-trivial.
2103  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2104  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2105  printMCExpr(*BE.getLHS(), OS);
2106  } else {
2107  OS << '(';
2108  printMCExpr(*BE.getLHS(), OS);
2109  OS<< ')';
2110  }
2111 
2112  switch (BE.getOpcode()) {
2113  case MCBinaryExpr::Add:
2114  // Print "X-42" instead of "X+-42".
2115  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2116  if (RHSC->getValue() < 0) {
2117  OS << RHSC->getValue();
2118  return;
2119  }
2120  }
2121 
2122  OS << '+';
2123  break;
2124  default: llvm_unreachable("Unhandled binary operator");
2125  }
2126 
2127  // Only print parens around the LHS if it is non-trivial.
2128  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2129  printMCExpr(*BE.getRHS(), OS);
2130  } else {
2131  OS << '(';
2132  printMCExpr(*BE.getRHS(), OS);
2133  OS << ')';
2134  }
2135  return;
2136  }
2137  }
2138 
2139  llvm_unreachable("Invalid expression kind!");
2140 }
2141 
2142 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2143 ///
2144 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2145  const char *ExtraCode, raw_ostream &O) {
2146  if (ExtraCode && ExtraCode[0]) {
2147  if (ExtraCode[1] != 0)
2148  return true; // Unknown modifier.
2149 
2150  switch (ExtraCode[0]) {
2151  default:
2152  // See if this is a generic print operand
2153  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2154  case 'r':
2155  break;
2156  }
2157  }
2158 
2159  printOperand(MI, OpNo, O);
2160 
2161  return false;
2162 }
2163 
2164 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2165  unsigned OpNo,
2166  const char *ExtraCode,
2167  raw_ostream &O) {
2168  if (ExtraCode && ExtraCode[0])
2169  return true; // Unknown modifier
2170 
2171  O << '[';
2172  printMemOperand(MI, OpNo, O);
2173  O << ']';
2174 
2175  return false;
2176 }
2177 
2178 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2179  raw_ostream &O) {
2180  const MachineOperand &MO = MI->getOperand(opNum);
2181  switch (MO.getType()) {
2184  if (MO.getReg() == NVPTX::VRDepot)
2185  O << DEPOTNAME << getFunctionNumber();
2186  else
2188  } else {
2189  emitVirtualRegister(MO.getReg(), O);
2190  }
2191  break;
2192 
2194  O << MO.getImm();
2195  break;
2196 
2198  printFPConstant(MO.getFPImm(), O);
2199  break;
2200 
2202  PrintSymbolOperand(MO, O);
2203  break;
2204 
2206  MO.getMBB()->getSymbol()->print(O, MAI);
2207  break;
2208 
2209  default:
2210  llvm_unreachable("Operand type not supported.");
2211  }
2212 }
2213 
2214 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2215  raw_ostream &O, const char *Modifier) {
2216  printOperand(MI, opNum, O);
2217 
2218  if (Modifier && strcmp(Modifier, "add") == 0) {
2219  O << ", ";
2220  printOperand(MI, opNum + 1, O);
2221  } else {
2222  if (MI->getOperand(opNum + 1).isImm() &&
2223  MI->getOperand(opNum + 1).getImm() == 0)
2224  return; // don't print ',0' or '+0'
2225  O << "+";
2226  printOperand(MI, opNum + 1, O);
2227  }
2228 }
2229 
2230 // Force static initialization.
2234 }
llvm::NVPTXAsmPrinter::AggBuffer
friend class AggBuffer
Definition: NVPTXAsmPrinter.h:151
llvm::codeview::SimpleTypeKind::Byte
@ Byte
i
i
Definition: README.txt:29
llvm::NVPTXAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: NVPTXAsmPrinter.cpp:888
NVPTXInstPrinter.h
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:459
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::TargetMachine::getPointerSizeInBits
unsigned getPointerSizeInBits(unsigned AS) const
Definition: TargetMachine.h:186
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:264
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:52
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::NVPTXII::IsTexModeUnifiedFlag
@ IsTexModeUnifiedFlag
Definition: NVPTXBaseInfo.h:40
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::GlobalValue::hasCommonLinkage
bool hasCommonLinkage() const
Definition: GlobalValue.h:527
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
__CLK_ADDRESS_BASE
@ __CLK_ADDRESS_BASE
Definition: cl_common_defines.h:69
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:126
llvm::NVPTXInstPrinter::getRegisterName
static const char * getRegisterName(unsigned RegNo)
llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
llvm::MCUnaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:468
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:506
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:201
DebugInfoMetadata.h
MCInstrDesc.h
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:572
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:60
llvm::Type::VoidTyID
@ VoidTyID
type with no size
Definition: Type.h:63
StringRef.h
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:629
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:210
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:47
NativeFormatting.h
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:111
llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
name
static const char * name
Definition: SMEABIPass.cpp:49
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
llvm::GlobalValue::hasWeakLinkage
bool hasWeakLinkage() const
Definition: GlobalValue.h:517
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
usedInGlobalVarDef
static bool usedInGlobalVarDef(const Constant *C)
Definition: NVPTXAsmPrinter.cpp:621
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
Path.h
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::Value::hasName
bool hasName() const
Definition: Value.h:261
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::NVPTXGenericMCSymbolRefExpr::create
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
DiscoverDependentGlobals
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
Definition: NVPTXAsmPrinter.cpp:100
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
usedInOneFunc
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
Definition: NVPTXAsmPrinter.cpp:637
llvm::MCUnaryExpr::Plus
@ Plus
Unary plus.
Definition: MCExpr.h:431
llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:530
llvm::MCBinaryExpr::Add
@ Add
Addition.
Definition: MCExpr.h:484
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
MachineBasicBlock.h
llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:81
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:2075
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:127
llvm::MachineModuleInfo::hasDebugInfo
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: MachineModuleInfo.h:182
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:276
llvm::DenseMapIterator
Definition: DenseMap.h:57
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AsmPrinter::PrintSymbolOperand
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
Definition: AsmPrinterInlineAsm.cpp:457
DenseMap.h
Module.h
llvm::AttributeList
Definition: Attributes.h:431
llvm::MaybeAlign::valueOrOne
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:142
llvm::NVPTXFloatMCExpr::createConstantFPDouble
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
printOperand
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
Definition: SelectionDAGDumper.cpp:958
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:222
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
NVPTXMCAsmInfo.h
llvm::MCAsmInfo::getCodePointerSize
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:550
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:770
Operator.h
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
NVPTXTargetStreamer.h
llvm::NVPTXII::IsSuldShift
@ IsSuldShift
Definition: NVPTXBaseInfo.h:37
llvm::MCBinaryExpr
Binary assembler expressions.
Definition: MCExpr.h:481
caller
int caller(int32 arg1, int32 arg2)
Definition: README.txt:681
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::format_hex_no_prefix
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MCUnaryExpr
Unary assembler expressions.
Definition: MCExpr.h:425
NVPTXRegisterInfo.h
ConstantFolding.h
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:143
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:51
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
NVPTX.h
MachineRegisterInfo.h
llvm::AsmPrinter::EmitToStreamer
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:403
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:165
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
NVPTXUtilities.h
MachineValueType.h
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
cl_common_defines.h
NVPTXTargetInfo.h
Instruction.h
llvm::MachineBasicBlock::getSymbol
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
Definition: MachineBasicBlock.cpp:58
CommandLine.h
llvm::NVPTXFloatMCExpr::createConstantFPSingle
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
llvm::NVPTXAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
Definition: NVPTXAsmPrinter.cpp:486
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:268
printMCExpr
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
Definition: SystemZAsmParser.cpp:703
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:205
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
GlobalValue.h
MachineLoopInfo.h
llvm::ConstantDataSequential
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:570
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:266
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:852
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:839
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Definition: StringRef.h:260
llvm::GlobalValue::hasAppendingLinkage
bool hasAppendingLinkage() const
Definition: GlobalValue.h:520
Constants.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
SmallString.h
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:61
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
llvm::User
Definition: User.h:44
llvm::MCExpr::Target
@ Target
Target specific expression.
Definition: MCExpr.h:42
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::DICompileUnit::LineTablesOnly
@ LineTablesOnly
Definition: DebugInfoMetadata.h:1368
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::NVPTXII::IsSurfTexQueryFlag
@ IsSurfTexQueryFlag
Definition: NVPTXBaseInfo.h:39
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
MCSymbol.h
llvm::GlobalValue::hasPrivateLinkage
bool hasPrivateLinkage() const
Definition: GlobalValue.h:522
llvm::NVPTXSubtarget::hasMaskOperator
bool hasMaskOperator() const
Definition: NVPTXSubtarget.h:80
llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:84
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
MCInst.h
DenseSet.h
llvm::MCBinaryExpr::getRHS
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:631
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:247
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:410
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::NVPTXTargetMachine::getManagedStrPool
ManagedStringPool * getManagedStrPool() const
Definition: NVPTXTargetMachine.h:50
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:272
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::NVPTXSubtarget::getRegisterInfo
const NVPTXRegisterInfo * getRegisterInfo() const override
Definition: NVPTXSubtarget.h:61
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::AsmPrinter::CurrentFnSym
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1486
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
llvm::MCSymbolRefExpr::getSymbol
const MCSymbol & getSymbol() const
Definition: MCExpr.h:399
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:585
DebugLoc.h
llvm::promoteScalarArgumentSize
unsigned promoteScalarArgumentSize(unsigned size)
Definition: NVPTXUtilities.h:65
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:521
llvm::Type::PointerTyID
@ PointerTyID
Pointers.
Definition: Type.h:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
__CLK_FILTER_MASK
@ __CLK_FILTER_MASK
Definition: cl_common_defines.h:97
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1145
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::DICompileUnit::DebugDirectivesOnly
@ DebugDirectivesOnly
Definition: DebugInfoMetadata.h:1369
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::Type::BFloatTyID
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
llvm::MCSymbol::print
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
llvm::MCUnaryExpr::Minus
@ Minus
Unary minus.
Definition: MCExpr.h:429
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3668
llvm::MachineLoopInfo::isLoopHeader
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
Definition: MachineLoopInfo.h:141
Type.h
llvm::MCExpr::Binary
@ Binary
Binary expressions.
Definition: MCExpr.h:38
llvm::DICompileUnit::FullDebug
@ FullDebug
Definition: DebugInfoMetadata.h:1367
NVPTXMachineFunctionInfo.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::getNVPTXRegClassStr
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:73
llvm::NVPTX::NVCL
@ NVCL
Definition: NVPTX.h:71
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
llvm::SmallString< 128 >
llvm::ManagedStringPool::getManagedString
std::string * getManagedString(const char *S)
Definition: ManagedStringPool.h:39
llvm::getTheNVPTXTarget64
Target & getTheNVPTXTarget64()
Definition: NVPTXTargetInfo.cpp:17
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::MCConstantExpr
Definition: MCExpr.h:144
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXFloatMCExpr::createConstantFPHalf
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:284
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::NVPTXRegisterInfo::getName
const char * getName(unsigned RegNo) const
Definition: NVPTXRegisterInfo.h:52
BasicBlock.h
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:103
llvm::APFloat
Definition: APFloat.h:716
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:507
LLVMInitializeNVPTXAsmPrinter
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
Definition: NVPTXAsmPrinter.cpp:2231
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::GlobalValue
Definition: GlobalValue.h:44
NVPTXAsmPrinter.h
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
VI
@ VI
Definition: SIInstrInfo.cpp:7967
llvm::NVPTXSubtarget::hasImageHandles
bool hasImageHandles() const
Definition: NVPTXSubtarget.cpp:55
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:771
DEPOTNAME
#define DEPOTNAME
Definition: NVPTXAsmPrinter.cpp:95
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:110
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Type::isIntOrPtrTy
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:225
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::NVPTXAsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: NVPTXAsmPrinter.cpp:782
llvm::DenseMap< unsigned, unsigned >
DebugInfo.h
NVPTXBaseInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
Analysis.h
StringExtras.h
isEmptyXXStructor
static bool isEmptyXXStructor(GlobalVariable *GV)
Definition: NVPTXAsmPrinter.cpp:761
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1360
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
useFuncSeen
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
Definition: NVPTXAsmPrinter.cpp:686
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:218
printMemOperand
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Definition: SelectionDAGDumper.cpp:523
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:556
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:656
llvm::NVPTXMachineFunctionInfo::getImageHandleSymbol
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
Definition: NVPTXMachineFunctionInfo.h:50
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:673
__CLK_FILTER_BASE
@ __CLK_FILTER_BASE
Definition: cl_common_defines.h:93
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
MachineModuleInfo.h
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::MachineOperand::MO_FPImmediate
@ MO_FPImmediate
Floating-point immediate operand.
Definition: MachineOperand.h:54
__CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_BASE
Definition: cl_common_defines.h:85
VisitGlobalVariableForEmission
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
Definition: NVPTXAsmPrinter.cpp:117
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::MCSymbolRefExpr
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
Triple.h
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1715
llvm::AsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:387
llvm::write_hex
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, Optional< size_t > Width=None)
Definition: NativeFormatting.cpp:138
llvm::NVPTXTargetStreamer::closeLastSection
void closeLastSection()
Close last section.
Definition: NVPTXTargetStreamer.cpp:36
llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
llvm::NVPTXAsmPrinter::getVirtualRegisterName
std::string getVirtualRegisterName(unsigned) const
Definition: NVPTXAsmPrinter.cpp:583
llvm::DICompileUnit::NoDebug
@ NoDebug
Definition: DebugInfoMetadata.h:1366
llvm::AsmPrinter::GetExternalSymbolSymbol
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
Definition: AsmPrinter.cpp:3596
llvm::MCUnaryExpr::getSubExpr
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:471
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:204
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
GetSymbolRef
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
Definition: HexagonMCInstLower.cpp:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
TargetLoweringObjectFile.h
A
* A
Definition: README_ALTIVEC.txt:89
llvm::MachineOperand::MO_MachineBasicBlock
@ MO_MachineBasicBlock
MachineBasicBlock reference.
Definition: MachineOperand.h:55
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2040
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::HexPrintStyle::PrefixUpper
@ PrefixUpper
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:601
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::AsmPrinter::MMI
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
canDemoteGlobalVar
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
Definition: NVPTXAsmPrinter.cpp:668
llvm::NVPTXTargetStreamer
Implments NVPTX-specific streamer.
Definition: NVPTXTargetStreamer.h:18
llvm::NVPTXII::IsSuldMask
@ IsSuldMask
Definition: NVPTXBaseInfo.h:36
j
return j(j<< 16)
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::MCBinaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:625
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:260
llvm::NVPTX::CUDA
@ CUDA
Definition: NVPTX.h:72
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::AsmPrinter::getSubtargetInfo
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:398
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::MachineOperand::MO_ExternalSymbol
@ MO_ExternalSymbol
Name of external global symbol.
Definition: MachineOperand.h:60
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:154
llvm::MCUnaryExpr::LNot
@ LNot
Logical negation.
Definition: MCExpr.h:428
Casting.h
llvm::NVPTXTargetMachine::getSubtargetImpl
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: NVPTXTargetMachine.h:43
NVPTXTargetMachine.h
Function.h
__CLK_ADDRESS_MASK
@ __CLK_ADDRESS_MASK
Definition: cl_common_defines.h:79
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:99
llvm::APInt::getLoBits
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:605
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:408
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:765
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:294
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:242
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:83
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:256
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:198
instr
@ instr
Definition: HWAddressSanitizer.cpp:192
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:200
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:46
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5084
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
llvm::support::endian::read64le
uint64_t read64le(const void *P)
Definition: Endian.h:382
SmallVector.h
llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:82
llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition: Endian.h:381
User.h
llvm::MCExpr::SymbolRef
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
__CLK_NORMALIZED_MASK
@ __CLK_NORMALIZED_MASK
Definition: cl_common_defines.h:88
llvm::MachineOperand::getSymbolName
const char * getSymbolName() const
Definition: MachineOperand.h:617
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:163
llvm::MCExpr::Unary
@ Unary
Unary expressions.
Definition: MCExpr.h:41
MCStreamer.h
UnrollLoop.h
llvm::MCExpr::Constant
@ Constant
Constant expressions.
Definition: MCExpr.h:39
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:659
NVPTXSubtarget.h
llvm::NVPTXMachineFunctionInfo
Definition: NVPTXMachineFunctionInfo.h:20
llvm::AsmPrinter::getDataLayout
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:388
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::GlobalValue::hasLinkOnceLinkage
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:510
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1207
MachineOperand.h
llvm::NVPTXII::IsTexFlag
@ IsTexFlag
Definition: NVPTXBaseInfo.h:35
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:292
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::NVPTXAsmPrinter::getFunctionFrameSymbol
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
Definition: NVPTXAsmPrinter.cpp:508
llvm::AsmPrinter::emitInitialRawDwarfLocDirective
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:407
llvm::MCSymbolRefExpr::VK_None
@ VK_None
Definition: MCExpr.h:195
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:276
LLVMContext.h
llvm::NVPTXTargetMachine::getDrvInterface
NVPTX::DrvInterface getDrvInterface() const
Definition: NVPTXTargetMachine.h:49
llvm::RegisterAsmPrinter
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...
Definition: TargetRegistry.h:1419
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:252
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:237
raw_ostream.h
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:280
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
Endian.h
llvm::getNVPTXRegClassName
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:29
TargetRegistry.h
MCExpr.h
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:226
CU
Definition: AArch64AsmBackend.cpp:504
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:380
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
NVPTXMCExpr.h
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:467
llvm::MCUnaryExpr::Not
@ Not
Bitwise negation.
Definition: MCExpr.h:430
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::MCBinaryExpr::getLHS
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:628
llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:432
llvm::NVPTXII::IsSustFlag
@ IsSustFlag
Definition: NVPTXBaseInfo.h:38
llvm::getTheNVPTXTarget32
Target & getTheNVPTXTarget32()
Definition: NVPTXTargetInfo.cpp:13