LLVM  13.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
39 #include "llvm/CodeGen/Analysis.h"
51 #include "llvm/IR/Attributes.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DebugInfo.h"
58 #include "llvm/IR/DebugLoc.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/GlobalValue.h"
62 #include "llvm/IR/GlobalVariable.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/LLVMContext.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/MC/MCExpr.h"
70 #include "llvm/MC/MCInst.h"
71 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCStreamer.h"
73 #include "llvm/MC/MCSymbol.h"
74 #include "llvm/Support/Casting.h"
78 #include "llvm/Support/Path.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <cstring>
87 #include <new>
88 #include <string>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEPOTNAME "__local_depot"
95 
96 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
97 /// depends.
98 static void
101  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
102  Globals.insert(GV);
103  else {
104  if (const User *U = dyn_cast<User>(V)) {
105  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
106  DiscoverDependentGlobals(U->getOperand(i), Globals);
107  }
108  }
109  }
110 }
111 
112 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113 /// instances to be emitted, but only after any dependents have been added
114 /// first.s
115 static void
120  // Have we already visited this one?
121  if (Visited.count(GV))
122  return;
123 
124  // Do we have a circular dependency?
125  if (!Visiting.insert(GV).second)
126  report_fatal_error("Circular dependency found in global variable set");
127 
128  // Make sure we visit all dependents first
130  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
131  DiscoverDependentGlobals(GV->getOperand(i), Others);
132 
134  E = Others.end();
135  I != E; ++I)
136  VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
137 
138  // Now we can visit ourself
139  Order.push_back(GV);
140  Visited.insert(GV);
141  Visiting.erase(GV);
142 }
143 
144 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
145  MCInst Inst;
146  lowerToMCInst(MI, Inst);
147  EmitToStreamer(*OutStreamer, Inst);
148 }
149 
150 // Handle symbol backtracking for targets that do not support image handles
151 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
152  unsigned OpNo, MCOperand &MCOp) {
153  const MachineOperand &MO = MI->getOperand(OpNo);
154  const MCInstrDesc &MCID = MI->getDesc();
155 
156  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
157  // This is a texture fetch, so operand 4 is a texref and operand 5 is
158  // a samplerref
159  if (OpNo == 4 && MO.isImm()) {
160  lowerImageHandleSymbol(MO.getImm(), MCOp);
161  return true;
162  }
163  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
164  lowerImageHandleSymbol(MO.getImm(), MCOp);
165  return true;
166  }
167 
168  return false;
169  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
170  unsigned VecSize =
171  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
172 
173  // For a surface load of vector size N, the Nth operand will be the surfref
174  if (OpNo == VecSize && MO.isImm()) {
175  lowerImageHandleSymbol(MO.getImm(), MCOp);
176  return true;
177  }
178 
179  return false;
180  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
181  // This is a surface store, so operand 0 is a surfref
182  if (OpNo == 0 && MO.isImm()) {
183  lowerImageHandleSymbol(MO.getImm(), MCOp);
184  return true;
185  }
186 
187  return false;
188  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
189  // This is a query, so operand 1 is a surfref/texref
190  if (OpNo == 1 && MO.isImm()) {
191  lowerImageHandleSymbol(MO.getImm(), MCOp);
192  return true;
193  }
194 
195  return false;
196  }
197 
198  return false;
199 }
200 
201 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
202  // Ewwww
203  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
204  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
206  const char *Sym = MFI->getImageHandleSymbol(Index);
207  std::string *SymNamePtr =
208  nvTM.getManagedStrPool()->getManagedString(Sym);
209  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
210 }
211 
212 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
213  OutMI.setOpcode(MI->getOpcode());
214  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
215  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
216  const MachineOperand &MO = MI->getOperand(0);
217  OutMI.addOperand(GetSymbolRef(
219  return;
220  }
221 
222  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
223  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
224  const MachineOperand &MO = MI->getOperand(i);
225 
226  MCOperand MCOp;
227  if (!STI.hasImageHandles()) {
228  if (lowerImageHandleOperand(MI, i, MCOp)) {
229  OutMI.addOperand(MCOp);
230  continue;
231  }
232  }
233 
234  if (lowerOperand(MO, MCOp))
235  OutMI.addOperand(MCOp);
236  }
237 }
238 
239 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
240  MCOperand &MCOp) {
241  switch (MO.getType()) {
242  default: llvm_unreachable("unknown operand type");
244  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
245  break;
247  MCOp = MCOperand::createImm(MO.getImm());
248  break;
251  MO.getMBB()->getSymbol(), OutContext));
252  break;
255  break;
257  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
258  break;
260  const ConstantFP *Cnt = MO.getFPImm();
261  const APFloat &Val = Cnt->getValueAPF();
262 
263  switch (Cnt->getType()->getTypeID()) {
264  default: report_fatal_error("Unsupported FP type"); break;
265  case Type::HalfTyID:
266  MCOp = MCOperand::createExpr(
268  break;
269  case Type::FloatTyID:
270  MCOp = MCOperand::createExpr(
272  break;
273  case Type::DoubleTyID:
274  MCOp = MCOperand::createExpr(
276  break;
277  }
278  break;
279  }
280  }
281  return true;
282 }
283 
284 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
286  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
287 
288  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
289  unsigned RegNum = RegMap[Reg];
290 
291  // Encode the register class in the upper 4 bits
292  // Must be kept in sync with NVPTXInstPrinter::printRegName
293  unsigned Ret = 0;
294  if (RC == &NVPTX::Int1RegsRegClass) {
295  Ret = (1 << 28);
296  } else if (RC == &NVPTX::Int16RegsRegClass) {
297  Ret = (2 << 28);
298  } else if (RC == &NVPTX::Int32RegsRegClass) {
299  Ret = (3 << 28);
300  } else if (RC == &NVPTX::Int64RegsRegClass) {
301  Ret = (4 << 28);
302  } else if (RC == &NVPTX::Float32RegsRegClass) {
303  Ret = (5 << 28);
304  } else if (RC == &NVPTX::Float64RegsRegClass) {
305  Ret = (6 << 28);
306  } else if (RC == &NVPTX::Float16RegsRegClass) {
307  Ret = (7 << 28);
308  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
309  Ret = (8 << 28);
310  } else {
311  report_fatal_error("Bad register class");
312  }
313 
314  // Insert the vreg number
315  Ret |= (RegNum & 0x0FFFFFFF);
316  return Ret;
317  } else {
318  // Some special-use registers are actually physical registers.
319  // Encode this as the register class ID of 0 and the real register ID.
320  return Reg & 0x0FFFFFFF;
321  }
322 }
323 
325  const MCExpr *Expr;
327  OutContext);
328  return MCOperand::createExpr(Expr);
329 }
330 
331 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
332  const DataLayout &DL = getDataLayout();
333  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
334  const TargetLowering *TLI = STI.getTargetLowering();
335 
336  Type *Ty = F->getReturnType();
337 
338  bool isABI = (STI.getSmVersion() >= 20);
339 
340  if (Ty->getTypeID() == Type::VoidTyID)
341  return;
342 
343  O << " (";
344 
345  if (isABI) {
346  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
347  unsigned size = 0;
348  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
349  size = ITy->getBitWidth();
350  } else {
351  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
353  }
354  // PTX ABI requires all scalar return values to be at least 32
355  // bits in size. fp16 normally uses .b16 as its storage type in
356  // PTX, so its size must be adjusted here, too.
357  if (size < 32)
358  size = 32;
359 
360  O << ".param .b" << size << " func_retval0";
361  } else if (isa<PointerType>(Ty)) {
362  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
363  << " func_retval0";
364  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
365  unsigned totalsz = DL.getTypeAllocSize(Ty);
366  unsigned retAlignment = 0;
367  if (!getAlign(*F, 0, retAlignment))
368  retAlignment = DL.getABITypeAlignment(Ty);
369  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
370  << "]";
371  } else
372  llvm_unreachable("Unknown return type");
373  } else {
374  SmallVector<EVT, 16> vtparts;
375  ComputeValueVTs(*TLI, DL, Ty, vtparts);
376  unsigned idx = 0;
377  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
378  unsigned elems = 1;
379  EVT elemtype = vtparts[i];
380  if (vtparts[i].isVector()) {
381  elems = vtparts[i].getVectorNumElements();
382  elemtype = vtparts[i].getVectorElementType();
383  }
384 
385  for (unsigned j = 0, je = elems; j != je; ++j) {
386  unsigned sz = elemtype.getSizeInBits();
387  if (elemtype.isInteger() && (sz < 32))
388  sz = 32;
389  O << ".reg .b" << sz << " func_retval" << idx;
390  if (j < je - 1)
391  O << ", ";
392  ++idx;
393  }
394  if (i < e - 1)
395  O << ", ";
396  }
397  }
398  O << ") ";
399 }
400 
401 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
402  raw_ostream &O) {
403  const Function &F = MF.getFunction();
404  printReturnValStr(&F, O);
405 }
406 
407 // Return true if MBB is the header of a loop marked with
408 // llvm.loop.unroll.disable.
409 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
410 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
411  const MachineBasicBlock &MBB) const {
412  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
413  // We insert .pragma "nounroll" only to the loop header.
414  if (!LI.isLoopHeader(&MBB))
415  return false;
416 
417  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
418  // we iterate through each back edge of the loop with header MBB, and check
419  // whether its metadata contains llvm.loop.unroll.disable.
420  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
421  const MachineBasicBlock *PMBB = *I;
422  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
423  // Edges from other loops to MBB are not back edges.
424  continue;
425  }
426  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
427  if (MDNode *LoopID =
428  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
429  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
430  return true;
431  }
432  }
433  }
434  return false;
435 }
436 
437 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
439  if (isLoopHeaderOfNoUnroll(MBB))
440  OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
441 }
442 
443 void NVPTXAsmPrinter::emitFunctionEntryLabel() {
444  SmallString<128> Str;
445  raw_svector_ostream O(Str);
446 
447  if (!GlobalsEmitted) {
448  emitGlobals(*MF->getFunction().getParent());
449  GlobalsEmitted = true;
450  }
451 
452  // Set up
453  MRI = &MF->getRegInfo();
454  F = &MF->getFunction();
455  emitLinkageDirective(F, O);
456  if (isKernelFunction(*F))
457  O << ".entry ";
458  else {
459  O << ".func ";
460  printReturnValStr(*MF, O);
461  }
462 
463  CurrentFnSym->print(O, MAI);
464 
465  emitFunctionParamList(*MF, O);
466 
467  if (isKernelFunction(*F))
468  emitKernelFunctionDirectives(*F, O);
469 
470  OutStreamer->emitRawText(O.str());
471 
472  VRegMapping.clear();
473  // Emit open brace for function body.
474  OutStreamer->emitRawText(StringRef("{\n"));
475  setAndEmitFunctionVirtualRegisters(*MF);
476  // Emit initial .loc debug directive for correct relocation symbol data.
477  if (MMI && MMI->hasDebugInfo())
479 }
480 
482  bool Result = AsmPrinter::runOnMachineFunction(F);
483  // Emit closing brace for the body of function F.
484  // The closing brace must be emitted here because we need to emit additional
485  // debug labels/data after the last basic block.
486  // We need to emit the closing brace here because we don't have function that
487  // finished emission of the function body.
488  OutStreamer->emitRawText(StringRef("}\n"));
489  return Result;
490 }
491 
492 void NVPTXAsmPrinter::emitFunctionBodyStart() {
493  SmallString<128> Str;
494  raw_svector_ostream O(Str);
495  emitDemotedVars(&MF->getFunction(), O);
496  OutStreamer->emitRawText(O.str());
497 }
498 
499 void NVPTXAsmPrinter::emitFunctionBodyEnd() {
500  VRegMapping.clear();
501 }
502 
504  SmallString<128> Str;
506  return OutContext.getOrCreateSymbol(Str);
507 }
508 
509 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
510  Register RegNo = MI->getOperand(0).getReg();
511  if (Register::isVirtualRegister(RegNo)) {
512  OutStreamer->AddComment(Twine("implicit-def: ") +
513  getVirtualRegisterName(RegNo));
514  } else {
515  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
516  OutStreamer->AddComment(Twine("implicit-def: ") +
517  STI.getRegisterInfo()->getName(RegNo));
518  }
519  OutStreamer->AddBlankLine();
520 }
521 
522 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
523  raw_ostream &O) const {
524  // If the NVVM IR has some of reqntid* specified, then output
525  // the reqntid directive, and set the unspecified ones to 1.
526  // If none of reqntid* is specified, don't output reqntid directive.
527  unsigned reqntidx, reqntidy, reqntidz;
528  bool specified = false;
529  if (!getReqNTIDx(F, reqntidx))
530  reqntidx = 1;
531  else
532  specified = true;
533  if (!getReqNTIDy(F, reqntidy))
534  reqntidy = 1;
535  else
536  specified = true;
537  if (!getReqNTIDz(F, reqntidz))
538  reqntidz = 1;
539  else
540  specified = true;
541 
542  if (specified)
543  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
544  << "\n";
545 
546  // If the NVVM IR has some of maxntid* specified, then output
547  // the maxntid directive, and set the unspecified ones to 1.
548  // If none of maxntid* is specified, don't output maxntid directive.
549  unsigned maxntidx, maxntidy, maxntidz;
550  specified = false;
551  if (!getMaxNTIDx(F, maxntidx))
552  maxntidx = 1;
553  else
554  specified = true;
555  if (!getMaxNTIDy(F, maxntidy))
556  maxntidy = 1;
557  else
558  specified = true;
559  if (!getMaxNTIDz(F, maxntidz))
560  maxntidz = 1;
561  else
562  specified = true;
563 
564  if (specified)
565  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
566  << "\n";
567 
568  unsigned mincta;
569  if (getMinCTASm(F, mincta))
570  O << ".minnctapersm " << mincta << "\n";
571 
572  unsigned maxnreg;
573  if (getMaxNReg(F, maxnreg))
574  O << ".maxnreg " << maxnreg << "\n";
575 }
576 
577 std::string
579  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
580 
581  std::string Name;
582  raw_string_ostream NameStr(Name);
583 
584  VRegRCMap::const_iterator I = VRegMapping.find(RC);
585  assert(I != VRegMapping.end() && "Bad register class");
586  const DenseMap<unsigned, unsigned> &RegMap = I->second;
587 
589  assert(VI != RegMap.end() && "Bad virtual register");
590  unsigned MappedVR = VI->second;
591 
592  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
593 
594  NameStr.flush();
595  return Name;
596 }
597 
598 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
599  raw_ostream &O) {
600  O << getVirtualRegisterName(vr);
601 }
602 
603 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
604  emitLinkageDirective(F, O);
605  if (isKernelFunction(*F))
606  O << ".entry ";
607  else
608  O << ".func ";
609  printReturnValStr(F, O);
610  getSymbol(F)->print(O, MAI);
611  O << "\n";
612  emitFunctionParamList(F, O);
613  O << ";\n";
614 }
615 
616 static bool usedInGlobalVarDef(const Constant *C) {
617  if (!C)
618  return false;
619 
620  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
621  return GV->getName() != "llvm.used";
622  }
623 
624  for (const User *U : C->users())
625  if (const Constant *C = dyn_cast<Constant>(U))
626  if (usedInGlobalVarDef(C))
627  return true;
628 
629  return false;
630 }
631 
632 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
633  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
634  if (othergv->getName() == "llvm.used")
635  return true;
636  }
637 
638  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
639  if (instr->getParent() && instr->getParent()->getParent()) {
640  const Function *curFunc = instr->getParent()->getParent();
641  if (oneFunc && (curFunc != oneFunc))
642  return false;
643  oneFunc = curFunc;
644  return true;
645  } else
646  return false;
647  }
648 
649  for (const User *UU : U->users())
650  if (!usedInOneFunc(UU, oneFunc))
651  return false;
652 
653  return true;
654 }
655 
656 /* Find out if a global variable can be demoted to local scope.
657  * Currently, this is valid for CUDA shared variables, which have local
658  * scope and global lifetime. So the conditions to check are :
659  * 1. Is the global variable in shared address space?
660  * 2. Does it have internal linkage?
661  * 3. Is the global variable referenced only in one function?
662  */
663 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
664  if (!gv->hasInternalLinkage())
665  return false;
666  PointerType *Pty = gv->getType();
668  return false;
669 
670  const Function *oneFunc = nullptr;
671 
672  bool flag = usedInOneFunc(gv, oneFunc);
673  if (!flag)
674  return false;
675  if (!oneFunc)
676  return false;
677  f = oneFunc;
678  return true;
679 }
680 
681 static bool useFuncSeen(const Constant *C,
683  for (const User *U : C->users()) {
684  if (const Constant *cu = dyn_cast<Constant>(U)) {
685  if (useFuncSeen(cu, seenMap))
686  return true;
687  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
688  const BasicBlock *bb = I->getParent();
689  if (!bb)
690  continue;
691  const Function *caller = bb->getParent();
692  if (!caller)
693  continue;
694  if (seenMap.find(caller) != seenMap.end())
695  return true;
696  }
697  }
698  return false;
699 }
700 
701 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
703  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
704  const Function *F = &*FI;
705 
706  if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
707  emitDeclaration(F, O);
708  continue;
709  }
710 
711  if (F->isDeclaration()) {
712  if (F->use_empty())
713  continue;
714  if (F->getIntrinsicID())
715  continue;
716  emitDeclaration(F, O);
717  continue;
718  }
719  for (const User *U : F->users()) {
720  if (const Constant *C = dyn_cast<Constant>(U)) {
721  if (usedInGlobalVarDef(C)) {
722  // The use is in the initialization of a global variable
723  // that is a function pointer, so print a declaration
724  // for the original function
725  emitDeclaration(F, O);
726  break;
727  }
728  // Emit a declaration of this function if the function that
729  // uses this constant expr has already been seen.
730  if (useFuncSeen(C, seenMap)) {
731  emitDeclaration(F, O);
732  break;
733  }
734  }
735 
736  if (!isa<Instruction>(U))
737  continue;
738  const Instruction *instr = cast<Instruction>(U);
739  const BasicBlock *bb = instr->getParent();
740  if (!bb)
741  continue;
742  const Function *caller = bb->getParent();
743  if (!caller)
744  continue;
745 
746  // If a caller has already been seen, then the caller is
747  // appearing in the module before the callee. so print out
748  // a declaration for the callee.
749  if (seenMap.find(caller) != seenMap.end()) {
750  emitDeclaration(F, O);
751  break;
752  }
753  }
754  seenMap[F] = true;
755  }
756 }
757 
759  if (!GV) return true;
760  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
761  if (!InitList) return true; // Not an array; we don't know how to parse.
762  return InitList->getNumOperands() == 0;
763 }
764 
765 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
766  // Construct a default subtarget off of the TargetMachine defaults. The
767  // rest of NVPTX isn't friendly to change subtargets per function and
768  // so the default TargetMachine will have all of the options.
769  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
770  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
771  SmallString<128> Str1;
772  raw_svector_ostream OS1(Str1);
773 
774  // Emit header before any dwarf directives are emitted below.
775  emitHeader(M, OS1, *STI);
776  OutStreamer->emitRawText(OS1.str());
777 }
778 
780  if (M.alias_size()) {
781  report_fatal_error("Module has aliases, which NVPTX does not support.");
782  return true; // error
783  }
784  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
786  "Module has a nontrivial global ctor, which NVPTX does not support.");
787  return true; // error
788  }
789  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
791  "Module has a nontrivial global dtor, which NVPTX does not support.");
792  return true; // error
793  }
794 
795  // We need to call the parent's one explicitly.
796  bool Result = AsmPrinter::doInitialization(M);
797 
798  GlobalsEmitted = false;
799 
800  return Result;
801 }
802 
803 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
804  SmallString<128> Str2;
805  raw_svector_ostream OS2(Str2);
806 
807  emitDeclarations(M, OS2);
808 
809  // As ptxas does not support forward references of globals, we need to first
810  // sort the list of module-level globals in def-use order. We visit each
811  // global variable in order, and ensure that we emit it *after* its dependent
812  // globals. We use a little extra memory maintaining both a set and a list to
813  // have fast searches while maintaining a strict ordering.
817 
818  // Visit each global variable, in order
819  for (const GlobalVariable &I : M.globals())
820  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
821 
822  assert(GVVisited.size() == M.getGlobalList().size() &&
823  "Missed a global variable");
824  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
825 
826  // Print out module-level global variables in proper order
827  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
828  printModuleLevelGV(Globals[i], OS2);
829 
830  OS2 << '\n';
831 
832  OutStreamer->emitRawText(OS2.str());
833 }
834 
835 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
836  const NVPTXSubtarget &STI) {
837  O << "//\n";
838  O << "// Generated by LLVM NVPTX Back-End\n";
839  O << "//\n";
840  O << "\n";
841 
842  unsigned PTXVersion = STI.getPTXVersion();
843  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
844 
845  O << ".target ";
846  O << STI.getTargetName();
847 
848  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
849  if (NTM.getDrvInterface() == NVPTX::NVCL)
850  O << ", texmode_independent";
851 
852  bool HasFullDebugInfo = false;
853  for (DICompileUnit *CU : M.debug_compile_units()) {
854  switch(CU->getEmissionKind()) {
857  break;
860  HasFullDebugInfo = true;
861  break;
862  }
863  if (HasFullDebugInfo)
864  break;
865  }
866  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
867  O << ", debug";
868 
869  O << "\n";
870 
871  O << ".address_size ";
872  if (NTM.is64Bit())
873  O << "64";
874  else
875  O << "32";
876  O << "\n";
877 
878  O << "\n";
879 }
880 
882  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
883 
884  // If we did not emit any functions, then the global declarations have not
885  // yet been emitted.
886  if (!GlobalsEmitted) {
887  emitGlobals(M);
888  GlobalsEmitted = true;
889  }
890 
891  // XXX Temproarily remove global variables so that doFinalization() will not
892  // emit them again (global variables are emitted at beginning).
893 
894  Module::GlobalListType &global_list = M.getGlobalList();
895  int i, n = global_list.size();
896  GlobalVariable **gv_array = new GlobalVariable *[n];
897 
898  // first, back-up GlobalVariable in gv_array
899  i = 0;
900  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
901  I != E; ++I)
902  gv_array[i++] = &*I;
903 
904  // second, empty global_list
905  while (!global_list.empty())
906  global_list.remove(global_list.begin());
907 
908  // call doFinalization
910 
911  // now we restore global variables
912  for (i = 0; i < n; i++)
913  global_list.insert(global_list.end(), gv_array[i]);
914 
916 
917  delete[] gv_array;
918  // Close the last emitted section
919  if (HasDebugInfo) {
920  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
921  ->closeLastSection();
922  // Emit empty .debug_loc section for better support of the empty files.
923  OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
924  }
925 
926  // Output last DWARF .file directives, if any.
927  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
928  ->outputDwarfFileDirectives();
929 
930  return ret;
931 
932  //bool Result = AsmPrinter::doFinalization(M);
933  // Instead of calling the parents doFinalization, we may
934  // clone parents doFinalization and customize here.
935  // Currently, we if NVISA out the EmitGlobals() in
936  // parent's doFinalization, which is too intrusive.
937  //
938  // Same for the doInitialization.
939  //return Result;
940 }
941 
942 // This function emits appropriate linkage directives for
943 // functions and global variables.
944 //
945 // extern function declaration -> .extern
946 // extern function definition -> .visible
947 // external global variable with init -> .visible
948 // external without init -> .extern
949 // appending -> not allowed, assert.
950 // for any linkage other than
951 // internal, private, linker_private,
952 // linker_private_weak, linker_private_weak_def_auto,
953 // we emit -> .weak.
954 
955 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
956  raw_ostream &O) {
957  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
958  if (V->hasExternalLinkage()) {
959  if (isa<GlobalVariable>(V)) {
960  const GlobalVariable *GVar = cast<GlobalVariable>(V);
961  if (GVar) {
962  if (GVar->hasInitializer())
963  O << ".visible ";
964  else
965  O << ".extern ";
966  }
967  } else if (V->isDeclaration())
968  O << ".extern ";
969  else
970  O << ".visible ";
971  } else if (V->hasAppendingLinkage()) {
972  std::string msg;
973  msg.append("Error: ");
974  msg.append("Symbol ");
975  if (V->hasName())
976  msg.append(std::string(V->getName()));
977  msg.append("has unsupported appending linkage type");
978  llvm_unreachable(msg.c_str());
979  } else if (!V->hasInternalLinkage() &&
980  !V->hasPrivateLinkage()) {
981  O << ".weak ";
982  }
983  }
984 }
985 
986 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
987  raw_ostream &O,
988  bool processDemoted) {
989  // Skip meta data
990  if (GVar->hasSection()) {
991  if (GVar->getSection() == "llvm.metadata")
992  return;
993  }
994 
995  // Skip LLVM intrinsic global variables
996  if (GVar->getName().startswith("llvm.") ||
997  GVar->getName().startswith("nvvm."))
998  return;
999 
1000  const DataLayout &DL = getDataLayout();
1001 
1002  // GlobalVariables are always constant pointers themselves.
1003  PointerType *PTy = GVar->getType();
1004  Type *ETy = GVar->getValueType();
1005 
1006  if (GVar->hasExternalLinkage()) {
1007  if (GVar->hasInitializer())
1008  O << ".visible ";
1009  else
1010  O << ".extern ";
1011  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1013  GVar->hasCommonLinkage()) {
1014  O << ".weak ";
1015  }
1016 
1017  if (isTexture(*GVar)) {
1018  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1019  return;
1020  }
1021 
1022  if (isSurface(*GVar)) {
1023  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1024  return;
1025  }
1026 
1027  if (GVar->isDeclaration()) {
1028  // (extern) declarations, no definition or initializer
1029  // Currently the only known declaration is for an automatic __local
1030  // (.shared) promoted to global.
1031  emitPTXGlobalVariable(GVar, O);
1032  O << ";\n";
1033  return;
1034  }
1035 
1036  if (isSampler(*GVar)) {
1037  O << ".global .samplerref " << getSamplerName(*GVar);
1038 
1039  const Constant *Initializer = nullptr;
1040  if (GVar->hasInitializer())
1041  Initializer = GVar->getInitializer();
1042  const ConstantInt *CI = nullptr;
1043  if (Initializer)
1044  CI = dyn_cast<ConstantInt>(Initializer);
1045  if (CI) {
1046  unsigned sample = CI->getZExtValue();
1047 
1048  O << " = { ";
1049 
1050  for (int i = 0,
1051  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1052  i < 3; i++) {
1053  O << "addr_mode_" << i << " = ";
1054  switch (addr) {
1055  case 0:
1056  O << "wrap";
1057  break;
1058  case 1:
1059  O << "clamp_to_border";
1060  break;
1061  case 2:
1062  O << "clamp_to_edge";
1063  break;
1064  case 3:
1065  O << "wrap";
1066  break;
1067  case 4:
1068  O << "mirror";
1069  break;
1070  }
1071  O << ", ";
1072  }
1073  O << "filter_mode = ";
1074  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1075  case 0:
1076  O << "nearest";
1077  break;
1078  case 1:
1079  O << "linear";
1080  break;
1081  case 2:
1082  llvm_unreachable("Anisotropic filtering is not supported");
1083  default:
1084  O << "nearest";
1085  break;
1086  }
1087  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1088  O << ", force_unnormalized_coords = 1";
1089  }
1090  O << " }";
1091  }
1092 
1093  O << ";\n";
1094  return;
1095  }
1096 
1097  if (GVar->hasPrivateLinkage()) {
1098  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1099  return;
1100 
1101  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1102  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1103  return;
1104  if (GVar->use_empty())
1105  return;
1106  }
1107 
1108  const Function *demotedFunc = nullptr;
1109  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1110  O << "// " << GVar->getName() << " has been demoted\n";
1111  if (localDecls.find(demotedFunc) != localDecls.end())
1112  localDecls[demotedFunc].push_back(GVar);
1113  else {
1114  std::vector<const GlobalVariable *> temp;
1115  temp.push_back(GVar);
1116  localDecls[demotedFunc] = temp;
1117  }
1118  return;
1119  }
1120 
1121  O << ".";
1122  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1123 
1124  if (isManaged(*GVar)) {
1125  O << " .attribute(.managed)";
1126  }
1127 
1128  if (GVar->getAlignment() == 0)
1129  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1130  else
1131  O << " .align " << GVar->getAlignment();
1132 
1133  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1134  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1135  O << " .";
1136  // Special case: ABI requires that we use .u8 for predicates
1137  if (ETy->isIntegerTy(1))
1138  O << "u8";
1139  else
1140  O << getPTXFundamentalTypeStr(ETy, false);
1141  O << " ";
1142  getSymbol(GVar)->print(O, MAI);
1143 
1144  // Ptx allows variable initilization only for constant and global state
1145  // spaces.
1146  if (GVar->hasInitializer()) {
1147  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1148  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1149  const Constant *Initializer = GVar->getInitializer();
1150  // 'undef' is treated as there is no value specified.
1151  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1152  O << " = ";
1153  printScalarConstant(Initializer, O);
1154  }
1155  } else {
1156  // The frontend adds zero-initializer to device and constant variables
1157  // that don't have an initial value, and UndefValue to shared
1158  // variables, so skip warning for this case.
1159  if (!GVar->getInitializer()->isNullValue() &&
1160  !isa<UndefValue>(GVar->getInitializer())) {
1161  report_fatal_error("initial value of '" + GVar->getName() +
1162  "' is not allowed in addrspace(" +
1163  Twine(PTy->getAddressSpace()) + ")");
1164  }
1165  }
1166  }
1167  } else {
1168  unsigned int ElementSize = 0;
1169 
1170  // Although PTX has direct support for struct type and array type and
1171  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1172  // targets that support these high level field accesses. Structs, arrays
1173  // and vectors are lowered into arrays of bytes.
1174  switch (ETy->getTypeID()) {
1175  case Type::IntegerTyID: // Integers larger than 64 bits
1176  case Type::StructTyID:
1177  case Type::ArrayTyID:
1178  case Type::FixedVectorTyID:
1179  ElementSize = DL.getTypeStoreSize(ETy);
1180  // Ptx allows variable initilization only for constant and
1181  // global state spaces.
1182  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1183  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1184  GVar->hasInitializer()) {
1185  const Constant *Initializer = GVar->getInitializer();
1186  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1187  AggBuffer aggBuffer(ElementSize, O, *this);
1188  bufferAggregateConstant(Initializer, &aggBuffer);
1189  if (aggBuffer.numSymbols) {
1190  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1191  O << " .u64 ";
1192  getSymbol(GVar)->print(O, MAI);
1193  O << "[";
1194  O << ElementSize / 8;
1195  } else {
1196  O << " .u32 ";
1197  getSymbol(GVar)->print(O, MAI);
1198  O << "[";
1199  O << ElementSize / 4;
1200  }
1201  O << "]";
1202  } else {
1203  O << " .b8 ";
1204  getSymbol(GVar)->print(O, MAI);
1205  O << "[";
1206  O << ElementSize;
1207  O << "]";
1208  }
1209  O << " = {";
1210  aggBuffer.print();
1211  O << "}";
1212  } else {
1213  O << " .b8 ";
1214  getSymbol(GVar)->print(O, MAI);
1215  if (ElementSize) {
1216  O << "[";
1217  O << ElementSize;
1218  O << "]";
1219  }
1220  }
1221  } else {
1222  O << " .b8 ";
1223  getSymbol(GVar)->print(O, MAI);
1224  if (ElementSize) {
1225  O << "[";
1226  O << ElementSize;
1227  O << "]";
1228  }
1229  }
1230  break;
1231  default:
1232  llvm_unreachable("type not supported yet");
1233  }
1234  }
1235  O << ";\n";
1236 }
1237 
1238 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1239  if (localDecls.find(f) == localDecls.end())
1240  return;
1241 
1242  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1243 
1244  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1245  O << "\t// demoted variable\n\t";
1246  printModuleLevelGV(gvars[i], O, true);
1247  }
1248 }
1249 
1250 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1251  raw_ostream &O) const {
1252  switch (AddressSpace) {
1253  case ADDRESS_SPACE_LOCAL:
1254  O << "local";
1255  break;
1256  case ADDRESS_SPACE_GLOBAL:
1257  O << "global";
1258  break;
1259  case ADDRESS_SPACE_CONST:
1260  O << "const";
1261  break;
1262  case ADDRESS_SPACE_SHARED:
1263  O << "shared";
1264  break;
1265  default:
1266  report_fatal_error("Bad address space found while emitting PTX: " +
1268  break;
1269  }
1270 }
1271 
1272 std::string
1273 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1274  switch (Ty->getTypeID()) {
1275  case Type::IntegerTyID: {
1276  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1277  if (NumBits == 1)
1278  return "pred";
1279  else if (NumBits <= 64) {
1280  std::string name = "u";
1281  return name + utostr(NumBits);
1282  } else {
1283  llvm_unreachable("Integer too large");
1284  break;
1285  }
1286  break;
1287  }
1288  case Type::HalfTyID:
1289  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1290  return "b16";
1291  case Type::FloatTyID:
1292  return "f32";
1293  case Type::DoubleTyID:
1294  return "f64";
1295  case Type::PointerTyID:
1296  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1297  if (useB4PTR)
1298  return "b64";
1299  else
1300  return "u64";
1301  else if (useB4PTR)
1302  return "b32";
1303  else
1304  return "u32";
1305  default:
1306  break;
1307  }
1308  llvm_unreachable("unexpected type");
1309 }
1310 
1311 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1312  raw_ostream &O) {
1313  const DataLayout &DL = getDataLayout();
1314 
1315  // GlobalVariables are always constant pointers themselves.
1316  Type *ETy = GVar->getValueType();
1317 
1318  O << ".";
1319  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1320  if (GVar->getAlignment() == 0)
1321  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1322  else
1323  O << " .align " << GVar->getAlignment();
1324 
1325  // Special case for i128
1326  if (ETy->isIntegerTy(128)) {
1327  O << " .b8 ";
1328  getSymbol(GVar)->print(O, MAI);
1329  O << "[16]";
1330  return;
1331  }
1332 
1333  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1334  O << " .";
1335  O << getPTXFundamentalTypeStr(ETy);
1336  O << " ";
1337  getSymbol(GVar)->print(O, MAI);
1338  return;
1339  }
1340 
1341  int64_t ElementSize = 0;
1342 
1343  // Although PTX has direct support for struct type and array type and LLVM IR
1344  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1345  // support these high level field accesses. Structs and arrays are lowered
1346  // into arrays of bytes.
1347  switch (ETy->getTypeID()) {
1348  case Type::StructTyID:
1349  case Type::ArrayTyID:
1350  case Type::FixedVectorTyID:
1351  ElementSize = DL.getTypeStoreSize(ETy);
1352  O << " .b8 ";
1353  getSymbol(GVar)->print(O, MAI);
1354  O << "[";
1355  if (ElementSize) {
1356  O << ElementSize;
1357  }
1358  O << "]";
1359  break;
1360  default:
1361  llvm_unreachable("type not supported yet");
1362  }
1363 }
1364 
1365 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1366  if (Ty->isSingleValueType())
1367  return DL.getPrefTypeAlignment(Ty);
1368 
1369  auto *ATy = dyn_cast<ArrayType>(Ty);
1370  if (ATy)
1371  return getOpenCLAlignment(DL, ATy->getElementType());
1372 
1373  auto *STy = dyn_cast<StructType>(Ty);
1374  if (STy) {
1375  unsigned int alignStruct = 1;
1376  // Go through each element of the struct and find the
1377  // largest alignment.
1378  for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1379  Type *ETy = STy->getElementType(i);
1380  unsigned int align = getOpenCLAlignment(DL, ETy);
1381  if (align > alignStruct)
1382  alignStruct = align;
1383  }
1384  return alignStruct;
1385  }
1386 
1387  auto *FTy = dyn_cast<FunctionType>(Ty);
1388  if (FTy)
1389  return DL.getPointerPrefAlignment().value();
1390  return DL.getPrefTypeAlignment(Ty);
1391 }
1392 
1393 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1394  int paramIndex, raw_ostream &O) {
1395  getSymbol(I->getParent())->print(O, MAI);
1396  O << "_param_" << paramIndex;
1397 }
1398 
1399 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1400  const DataLayout &DL = getDataLayout();
1401  const AttributeList &PAL = F->getAttributes();
1402  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1403  const TargetLowering *TLI = STI.getTargetLowering();
1405  unsigned paramIndex = 0;
1406  bool first = true;
1407  bool isKernelFunc = isKernelFunction(*F);
1408  bool isABI = (STI.getSmVersion() >= 20);
1409  bool hasImageHandles = STI.hasImageHandles();
1410  MVT thePointerTy = TLI->getPointerTy(DL);
1411 
1412  if (F->arg_empty()) {
1413  O << "()\n";
1414  return;
1415  }
1416 
1417  O << "(\n";
1418 
1419  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1420  Type *Ty = I->getType();
1421 
1422  if (!first)
1423  O << ",\n";
1424 
1425  first = false;
1426 
1427  // Handle image/sampler parameters
1428  if (isKernelFunction(*F)) {
1429  if (isSampler(*I) || isImage(*I)) {
1430  if (isImage(*I)) {
1431  std::string sname = std::string(I->getName());
1432  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1433  if (hasImageHandles)
1434  O << "\t.param .u64 .ptr .surfref ";
1435  else
1436  O << "\t.param .surfref ";
1437  CurrentFnSym->print(O, MAI);
1438  O << "_param_" << paramIndex;
1439  }
1440  else { // Default image is read_only
1441  if (hasImageHandles)
1442  O << "\t.param .u64 .ptr .texref ";
1443  else
1444  O << "\t.param .texref ";
1445  CurrentFnSym->print(O, MAI);
1446  O << "_param_" << paramIndex;
1447  }
1448  } else {
1449  if (hasImageHandles)
1450  O << "\t.param .u64 .ptr .samplerref ";
1451  else
1452  O << "\t.param .samplerref ";
1453  CurrentFnSym->print(O, MAI);
1454  O << "_param_" << paramIndex;
1455  }
1456  continue;
1457  }
1458  }
1459 
1460  if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
1461  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1462  // Just print .param .align <a> .b8 .param[size];
1463  // <a> = PAL.getparamalignment
1464  // size = typeallocsize of element type
1465  const Align align = DL.getValueOrABITypeAlignment(
1466  PAL.getParamAlignment(paramIndex), Ty);
1467 
1468  unsigned sz = DL.getTypeAllocSize(Ty);
1469  O << "\t.param .align " << align.value() << " .b8 ";
1470  printParamName(I, paramIndex, O);
1471  O << "[" << sz << "]";
1472 
1473  continue;
1474  }
1475  // Just a scalar
1476  auto *PTy = dyn_cast<PointerType>(Ty);
1477  if (isKernelFunc) {
1478  if (PTy) {
1479  // Special handling for pointer arguments to kernel
1480  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1481 
1482  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1483  NVPTX::CUDA) {
1484  Type *ETy = PTy->getElementType();
1485  int addrSpace = PTy->getAddressSpace();
1486  switch (addrSpace) {
1487  default:
1488  O << ".ptr ";
1489  break;
1490  case ADDRESS_SPACE_CONST:
1491  O << ".ptr .const ";
1492  break;
1493  case ADDRESS_SPACE_SHARED:
1494  O << ".ptr .shared ";
1495  break;
1496  case ADDRESS_SPACE_GLOBAL:
1497  O << ".ptr .global ";
1498  break;
1499  }
1500  O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1501  }
1502  printParamName(I, paramIndex, O);
1503  continue;
1504  }
1505 
1506  // non-pointer scalar to kernel func
1507  O << "\t.param .";
1508  // Special case: predicate operands become .u8 types
1509  if (Ty->isIntegerTy(1))
1510  O << "u8";
1511  else
1512  O << getPTXFundamentalTypeStr(Ty);
1513  O << " ";
1514  printParamName(I, paramIndex, O);
1515  continue;
1516  }
1517  // Non-kernel function, just print .param .b<size> for ABI
1518  // and .reg .b<size> for non-ABI
1519  unsigned sz = 0;
1520  if (isa<IntegerType>(Ty)) {
1521  sz = cast<IntegerType>(Ty)->getBitWidth();
1522  if (sz < 32)
1523  sz = 32;
1524  } else if (isa<PointerType>(Ty))
1525  sz = thePointerTy.getSizeInBits();
1526  else if (Ty->isHalfTy())
1527  // PTX ABI requires all scalar parameters to be at least 32
1528  // bits in size. fp16 normally uses .b16 as its storage type
1529  // in PTX, so its size must be adjusted here, too.
1530  sz = 32;
1531  else
1532  sz = Ty->getPrimitiveSizeInBits();
1533  if (isABI)
1534  O << "\t.param .b" << sz << " ";
1535  else
1536  O << "\t.reg .b" << sz << " ";
1537  printParamName(I, paramIndex, O);
1538  continue;
1539  }
1540 
1541  // param has byVal attribute. So should be a pointer
1542  auto *PTy = dyn_cast<PointerType>(Ty);
1543  assert(PTy && "Param with byval attribute should be a pointer type");
1544  Type *ETy = PTy->getElementType();
1545 
1546  if (isABI || isKernelFunc) {
1547  // Just print .param .align <a> .b8 .param[size];
1548  // <a> = PAL.getparamalignment
1549  // size = typeallocsize of element type
1550  Align align =
1551  DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
1552  // Work around a bug in ptxas. When PTX code takes address of
1553  // byval parameter with alignment < 4, ptxas generates code to
1554  // spill argument into memory. Alas on sm_50+ ptxas generates
1555  // SASS code that fails with misaligned access. To work around
1556  // the problem, make sure that we align byval parameters by at
1557  // least 4. Matching change must be made in LowerCall() where we
1558  // prepare parameters for the call.
1559  //
1560  // TODO: this will need to be undone when we get to support multi-TU
1561  // device-side compilation as it breaks ABI compatibility with nvcc.
1562  // Hopefully ptxas bug is fixed by then.
1563  if (!isKernelFunc && align < Align(4))
1564  align = Align(4);
1565  unsigned sz = DL.getTypeAllocSize(ETy);
1566  O << "\t.param .align " << align.value() << " .b8 ";
1567  printParamName(I, paramIndex, O);
1568  O << "[" << sz << "]";
1569  continue;
1570  } else {
1571  // Split the ETy into constituent parts and
1572  // print .param .b<size> <name> for each part.
1573  // Further, if a part is vector, print the above for
1574  // each vector element.
1575  SmallVector<EVT, 16> vtparts;
1576  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1577  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1578  unsigned elems = 1;
1579  EVT elemtype = vtparts[i];
1580  if (vtparts[i].isVector()) {
1581  elems = vtparts[i].getVectorNumElements();
1582  elemtype = vtparts[i].getVectorElementType();
1583  }
1584 
1585  for (unsigned j = 0, je = elems; j != je; ++j) {
1586  unsigned sz = elemtype.getSizeInBits();
1587  if (elemtype.isInteger() && (sz < 32))
1588  sz = 32;
1589  O << "\t.reg .b" << sz << " ";
1590  printParamName(I, paramIndex, O);
1591  if (j < je - 1)
1592  O << ",\n";
1593  ++paramIndex;
1594  }
1595  if (i < e - 1)
1596  O << ",\n";
1597  }
1598  --paramIndex;
1599  continue;
1600  }
1601  }
1602 
1603  O << "\n)\n";
1604 }
1605 
1606 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1607  raw_ostream &O) {
1608  const Function &F = MF.getFunction();
1609  emitFunctionParamList(&F, O);
1610 }
1611 
1612 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1613  const MachineFunction &MF) {
1614  SmallString<128> Str;
1615  raw_svector_ostream O(Str);
1616 
1617  // Map the global virtual register number to a register class specific
1618  // virtual register number starting from 1 with that class.
1620  //unsigned numRegClasses = TRI->getNumRegClasses();
1621 
1622  // Emit the Fake Stack Object
1623  const MachineFrameInfo &MFI = MF.getFrameInfo();
1624  int NumBytes = (int) MFI.getStackSize();
1625  if (NumBytes) {
1626  O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1627  << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1628  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1629  O << "\t.reg .b64 \t%SP;\n";
1630  O << "\t.reg .b64 \t%SPL;\n";
1631  } else {
1632  O << "\t.reg .b32 \t%SP;\n";
1633  O << "\t.reg .b32 \t%SPL;\n";
1634  }
1635  }
1636 
1637  // Go through all virtual registers to establish the mapping between the
1638  // global virtual
1639  // register number and the per class virtual register number.
1640  // We use the per class virtual register number in the ptx output.
1641  unsigned int numVRs = MRI->getNumVirtRegs();
1642  for (unsigned i = 0; i < numVRs; i++) {
1643  unsigned int vr = Register::index2VirtReg(i);
1644  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1645  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1646  int n = regmap.size();
1647  regmap.insert(std::make_pair(vr, n + 1));
1648  }
1649 
1650  // Emit register declarations
1651  // @TODO: Extract out the real register usage
1652  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1653  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1654  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1655  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1656  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1657  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1658  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1659 
1660  // Emit declaration of the virtual registers or 'physical' registers for
1661  // each register class
1662  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1663  const TargetRegisterClass *RC = TRI->getRegClass(i);
1664  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1665  std::string rcname = getNVPTXRegClassName(RC);
1666  std::string rcStr = getNVPTXRegClassStr(RC);
1667  int n = regmap.size();
1668 
1669  // Only declare those registers that may be used.
1670  if (n) {
1671  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1672  << ">;\n";
1673  }
1674  }
1675 
1676  OutStreamer->emitRawText(O.str());
1677 }
1678 
1679 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1680  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1681  bool ignored;
1682  unsigned int numHex;
1683  const char *lead;
1684 
1685  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1686  numHex = 8;
1687  lead = "0f";
1689  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1690  numHex = 16;
1691  lead = "0d";
1693  } else
1694  llvm_unreachable("unsupported fp type");
1695 
1696  APInt API = APF.bitcastToAPInt();
1697  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1698 }
1699 
1700 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1701  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1702  O << CI->getValue();
1703  return;
1704  }
1705  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1706  printFPConstant(CFP, O);
1707  return;
1708  }
1709  if (isa<ConstantPointerNull>(CPV)) {
1710  O << "0";
1711  return;
1712  }
1713  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1714  bool IsNonGenericPointer = false;
1715  if (GVar->getType()->getAddressSpace() != 0) {
1716  IsNonGenericPointer = true;
1717  }
1718  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1719  O << "generic(";
1720  getSymbol(GVar)->print(O, MAI);
1721  O << ")";
1722  } else {
1723  getSymbol(GVar)->print(O, MAI);
1724  }
1725  return;
1726  }
1727  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1728  const Value *v = Cexpr->stripPointerCasts();
1729  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1730  bool IsNonGenericPointer = false;
1731  if (PTy && PTy->getAddressSpace() != 0) {
1732  IsNonGenericPointer = true;
1733  }
1734  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1735  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1736  O << "generic(";
1737  getSymbol(GVar)->print(O, MAI);
1738  O << ")";
1739  } else {
1740  getSymbol(GVar)->print(O, MAI);
1741  }
1742  return;
1743  } else {
1744  lowerConstant(CPV)->print(O, MAI);
1745  return;
1746  }
1747  }
1748  llvm_unreachable("Not scalar type found in printScalarConstant()");
1749 }
1750 
1751 // These utility functions assure we get the right sequence of bytes for a given
1752 // type even for big-endian machines
1753 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1754  int64_t vp = (int64_t)val;
1755  for (unsigned i = 0; i < sizeof(T); ++i) {
1756  p[i] = (unsigned char)vp;
1757  vp >>= 8;
1758  }
1759 }
1760 static void ConvertFloatToBytes(unsigned char *p, float val) {
1761  int32_t *vp = (int32_t *)&val;
1762  for (unsigned i = 0; i < sizeof(int32_t); ++i) {
1763  p[i] = (unsigned char)*vp;
1764  *vp >>= 8;
1765  }
1766 }
1767 static void ConvertDoubleToBytes(unsigned char *p, double val) {
1768  int64_t *vp = (int64_t *)&val;
1769  for (unsigned i = 0; i < sizeof(int64_t); ++i) {
1770  p[i] = (unsigned char)*vp;
1771  *vp >>= 8;
1772  }
1773 }
1774 
1775 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1776  AggBuffer *aggBuffer) {
1777  const DataLayout &DL = getDataLayout();
1778 
1779  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1780  int s = DL.getTypeAllocSize(CPV->getType());
1781  if (s < Bytes)
1782  s = Bytes;
1783  aggBuffer->addZeros(s);
1784  return;
1785  }
1786 
1787  unsigned char ptr[8];
1788  switch (CPV->getType()->getTypeID()) {
1789 
1790  case Type::IntegerTyID: {
1791  Type *ETy = CPV->getType();
1792  if (ETy == Type::getInt8Ty(CPV->getContext())) {
1793  unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1794  ConvertIntToBytes<>(ptr, c);
1795  aggBuffer->addBytes(ptr, 1, Bytes);
1796  } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1797  short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1798  ConvertIntToBytes<>(ptr, int16);
1799  aggBuffer->addBytes(ptr, 2, Bytes);
1800  } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1801  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1802  int int32 = (int)(constInt->getZExtValue());
1803  ConvertIntToBytes<>(ptr, int32);
1804  aggBuffer->addBytes(ptr, 4, Bytes);
1805  break;
1806  } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1807  if (const auto *constInt = dyn_cast<ConstantInt>(
1808  ConstantFoldConstant(Cexpr, DL))) {
1809  int int32 = (int)(constInt->getZExtValue());
1810  ConvertIntToBytes<>(ptr, int32);
1811  aggBuffer->addBytes(ptr, 4, Bytes);
1812  break;
1813  }
1814  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1815  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1816  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1817  aggBuffer->addZeros(4);
1818  break;
1819  }
1820  }
1821  llvm_unreachable("unsupported integer const type");
1822  } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1823  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1824  long long int64 = (long long)(constInt->getZExtValue());
1825  ConvertIntToBytes<>(ptr, int64);
1826  aggBuffer->addBytes(ptr, 8, Bytes);
1827  break;
1828  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1829  if (const auto *constInt = dyn_cast<ConstantInt>(
1830  ConstantFoldConstant(Cexpr, DL))) {
1831  long long int64 = (long long)(constInt->getZExtValue());
1832  ConvertIntToBytes<>(ptr, int64);
1833  aggBuffer->addBytes(ptr, 8, Bytes);
1834  break;
1835  }
1836  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1837  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1838  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1839  aggBuffer->addZeros(8);
1840  break;
1841  }
1842  }
1843  llvm_unreachable("unsupported integer const type");
1844  } else
1845  llvm_unreachable("unsupported integer const type");
1846  break;
1847  }
1848  case Type::HalfTyID:
1849  case Type::FloatTyID:
1850  case Type::DoubleTyID: {
1851  const auto *CFP = cast<ConstantFP>(CPV);
1852  Type *Ty = CFP->getType();
1853  if (Ty == Type::getHalfTy(CPV->getContext())) {
1854  APInt API = CFP->getValueAPF().bitcastToAPInt();
1855  uint16_t float16 = API.getLoBits(16).getZExtValue();
1856  ConvertIntToBytes<>(ptr, float16);
1857  aggBuffer->addBytes(ptr, 2, Bytes);
1858  } else if (Ty == Type::getFloatTy(CPV->getContext())) {
1859  float float32 = (float) CFP->getValueAPF().convertToFloat();
1860  ConvertFloatToBytes(ptr, float32);
1861  aggBuffer->addBytes(ptr, 4, Bytes);
1862  } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1863  double float64 = CFP->getValueAPF().convertToDouble();
1864  ConvertDoubleToBytes(ptr, float64);
1865  aggBuffer->addBytes(ptr, 8, Bytes);
1866  } else {
1867  llvm_unreachable("unsupported fp const type");
1868  }
1869  break;
1870  }
1871  case Type::PointerTyID: {
1872  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1873  aggBuffer->addSymbol(GVar, GVar);
1874  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1875  const Value *v = Cexpr->stripPointerCasts();
1876  aggBuffer->addSymbol(v, Cexpr);
1877  }
1878  unsigned int s = DL.getTypeAllocSize(CPV->getType());
1879  aggBuffer->addZeros(s);
1880  break;
1881  }
1882 
1883  case Type::ArrayTyID:
1884  case Type::FixedVectorTyID:
1885  case Type::StructTyID: {
1886  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1887  int ElementSize = DL.getTypeAllocSize(CPV->getType());
1888  bufferAggregateConstant(CPV, aggBuffer);
1889  if (Bytes > ElementSize)
1890  aggBuffer->addZeros(Bytes - ElementSize);
1891  } else if (isa<ConstantAggregateZero>(CPV))
1892  aggBuffer->addZeros(Bytes);
1893  else
1894  llvm_unreachable("Unexpected Constant type");
1895  break;
1896  }
1897 
1898  default:
1899  llvm_unreachable("unsupported type");
1900  }
1901 }
1902 
1903 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1904  AggBuffer *aggBuffer) {
1905  const DataLayout &DL = getDataLayout();
1906  int Bytes;
1907 
1908  // Integers of arbitrary width
1909  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1910  APInt Val = CI->getValue();
1911  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1912  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1913  aggBuffer->addBytes(&Byte, 1, 1);
1914  Val.lshrInPlace(8);
1915  }
1916  return;
1917  }
1918 
1919  // Old constants
1920  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1921  if (CPV->getNumOperands())
1922  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1923  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1924  return;
1925  }
1926 
1927  if (const ConstantDataSequential *CDS =
1928  dyn_cast<ConstantDataSequential>(CPV)) {
1929  if (CDS->getNumElements())
1930  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1931  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1932  aggBuffer);
1933  return;
1934  }
1935 
1936  if (isa<ConstantStruct>(CPV)) {
1937  if (CPV->getNumOperands()) {
1938  StructType *ST = cast<StructType>(CPV->getType());
1939  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1940  if (i == (e - 1))
1941  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1942  DL.getTypeAllocSize(ST) -
1943  DL.getStructLayout(ST)->getElementOffset(i);
1944  else
1945  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1946  DL.getStructLayout(ST)->getElementOffset(i);
1947  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1948  }
1949  }
1950  return;
1951  }
1952  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1953 }
1954 
1955 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1956 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1957 /// expressions that are representable in PTX and create
1958 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1959 const MCExpr *
1960 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1961  MCContext &Ctx = OutContext;
1962 
1963  if (CV->isNullValue() || isa<UndefValue>(CV))
1964  return MCConstantExpr::create(0, Ctx);
1965 
1966  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1967  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1968 
1969  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1970  const MCSymbolRefExpr *Expr =
1972  if (ProcessingGeneric) {
1973  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1974  } else {
1975  return Expr;
1976  }
1977  }
1978 
1979  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1980  if (!CE) {
1981  llvm_unreachable("Unknown constant value to lower!");
1982  }
1983 
1984  switch (CE->getOpcode()) {
1985  default: {
1986  // If the code isn't optimized, there may be outstanding folding
1987  // opportunities. Attempt to fold the expression using DataLayout as a
1988  // last resort before giving up.
1990  if (C != CE)
1991  return lowerConstantForGV(C, ProcessingGeneric);
1992 
1993  // Otherwise report the problem to the user.
1994  std::string S;
1995  raw_string_ostream OS(S);
1996  OS << "Unsupported expression in static initializer: ";
1997  CE->printAsOperand(OS, /*PrintType=*/false,
1998  !MF ? nullptr : MF->getFunction().getParent());
1999  report_fatal_error(OS.str());
2000  }
2001 
2002  case Instruction::AddrSpaceCast: {
2003  // Strip the addrspacecast and pass along the operand
2004  PointerType *DstTy = cast<PointerType>(CE->getType());
2005  if (DstTy->getAddressSpace() == 0) {
2006  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2007  }
2008  std::string S;
2009  raw_string_ostream OS(S);
2010  OS << "Unsupported expression in static initializer: ";
2011  CE->printAsOperand(OS, /*PrintType=*/ false,
2012  !MF ? nullptr : MF->getFunction().getParent());
2013  report_fatal_error(OS.str());
2014  }
2015 
2016  case Instruction::GetElementPtr: {
2017  const DataLayout &DL = getDataLayout();
2018 
2019  // Generate a symbolic expression for the byte address
2020  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2021  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2022 
2023  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2024  ProcessingGeneric);
2025  if (!OffsetAI)
2026  return Base;
2027 
2028  int64_t Offset = OffsetAI.getSExtValue();
2030  Ctx);
2031  }
2032 
2033  case Instruction::Trunc:
2034  // We emit the value and depend on the assembler to truncate the generated
2035  // expression properly. This is important for differences between
2036  // blockaddress labels. Since the two labels are in the same function, it
2037  // is reasonable to treat their delta as a 32-bit value.
2039  case Instruction::BitCast:
2040  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2041 
2042  case Instruction::IntToPtr: {
2043  const DataLayout &DL = getDataLayout();
2044 
2045  // Handle casts to pointers by changing them into casts to the appropriate
2046  // integer type. This promotes constant folding and simplifies this code.
2047  Constant *Op = CE->getOperand(0);
2048  Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2049  false/*ZExt*/);
2050  return lowerConstantForGV(Op, ProcessingGeneric);
2051  }
2052 
2053  case Instruction::PtrToInt: {
2054  const DataLayout &DL = getDataLayout();
2055 
2056  // Support only foldable casts to/from pointers that can be eliminated by
2057  // changing the pointer to the appropriately sized integer type.
2058  Constant *Op = CE->getOperand(0);
2059  Type *Ty = CE->getType();
2060 
2061  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2062 
2063  // We can emit the pointer value into this slot if the slot is an
2064  // integer slot equal to the size of the pointer.
2065  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2066  return OpExpr;
2067 
2068  // Otherwise the pointer is smaller than the resultant integer, mask off
2069  // the high bits so we are sure to get a proper truncation if the input is
2070  // a constant expr.
2071  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2072  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2073  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2074  }
2075 
2076  // The MC library also has a right-shift operator, but it isn't consistently
2077  // signed or unsigned between different targets.
2078  case Instruction::Add: {
2079  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2080  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2081  switch (CE->getOpcode()) {
2082  default: llvm_unreachable("Unknown binary operator constant cast expr");
2083  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2084  }
2085  }
2086  }
2087 }
2088 
2089 // Copy of MCExpr::print customized for NVPTX
2090 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2091  switch (Expr.getKind()) {
2092  case MCExpr::Target:
2093  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2094  case MCExpr::Constant:
2095  OS << cast<MCConstantExpr>(Expr).getValue();
2096  return;
2097 
2098  case MCExpr::SymbolRef: {
2099  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2100  const MCSymbol &Sym = SRE.getSymbol();
2101  Sym.print(OS, MAI);
2102  return;
2103  }
2104 
2105  case MCExpr::Unary: {
2106  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2107  switch (UE.getOpcode()) {
2108  case MCUnaryExpr::LNot: OS << '!'; break;
2109  case MCUnaryExpr::Minus: OS << '-'; break;
2110  case MCUnaryExpr::Not: OS << '~'; break;
2111  case MCUnaryExpr::Plus: OS << '+'; break;
2112  }
2113  printMCExpr(*UE.getSubExpr(), OS);
2114  return;
2115  }
2116 
2117  case MCExpr::Binary: {
2118  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2119 
2120  // Only print parens around the LHS if it is non-trivial.
2121  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2122  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2123  printMCExpr(*BE.getLHS(), OS);
2124  } else {
2125  OS << '(';
2126  printMCExpr(*BE.getLHS(), OS);
2127  OS<< ')';
2128  }
2129 
2130  switch (BE.getOpcode()) {
2131  case MCBinaryExpr::Add:
2132  // Print "X-42" instead of "X+-42".
2133  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2134  if (RHSC->getValue() < 0) {
2135  OS << RHSC->getValue();
2136  return;
2137  }
2138  }
2139 
2140  OS << '+';
2141  break;
2142  default: llvm_unreachable("Unhandled binary operator");
2143  }
2144 
2145  // Only print parens around the LHS if it is non-trivial.
2146  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2147  printMCExpr(*BE.getRHS(), OS);
2148  } else {
2149  OS << '(';
2150  printMCExpr(*BE.getRHS(), OS);
2151  OS << ')';
2152  }
2153  return;
2154  }
2155  }
2156 
2157  llvm_unreachable("Invalid expression kind!");
2158 }
2159 
2160 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2161 ///
2162 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2163  const char *ExtraCode, raw_ostream &O) {
2164  if (ExtraCode && ExtraCode[0]) {
2165  if (ExtraCode[1] != 0)
2166  return true; // Unknown modifier.
2167 
2168  switch (ExtraCode[0]) {
2169  default:
2170  // See if this is a generic print operand
2171  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2172  case 'r':
2173  break;
2174  }
2175  }
2176 
2177  printOperand(MI, OpNo, O);
2178 
2179  return false;
2180 }
2181 
2182 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2183  unsigned OpNo,
2184  const char *ExtraCode,
2185  raw_ostream &O) {
2186  if (ExtraCode && ExtraCode[0])
2187  return true; // Unknown modifier
2188 
2189  O << '[';
2190  printMemOperand(MI, OpNo, O);
2191  O << ']';
2192 
2193  return false;
2194 }
2195 
2196 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2197  raw_ostream &O) {
2198  const MachineOperand &MO = MI->getOperand(opNum);
2199  switch (MO.getType()) {
2202  if (MO.getReg() == NVPTX::VRDepot)
2203  O << DEPOTNAME << getFunctionNumber();
2204  else
2206  } else {
2207  emitVirtualRegister(MO.getReg(), O);
2208  }
2209  break;
2210 
2212  O << MO.getImm();
2213  break;
2214 
2216  printFPConstant(MO.getFPImm(), O);
2217  break;
2218 
2220  PrintSymbolOperand(MO, O);
2221  break;
2222 
2224  MO.getMBB()->getSymbol()->print(O, MAI);
2225  break;
2226 
2227  default:
2228  llvm_unreachable("Operand type not supported.");
2229  }
2230 }
2231 
2232 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2233  raw_ostream &O, const char *Modifier) {
2234  printOperand(MI, opNum, O);
2235 
2236  if (Modifier && strcmp(Modifier, "add") == 0) {
2237  O << ", ";
2238  printOperand(MI, opNum + 1, O);
2239  } else {
2240  if (MI->getOperand(opNum + 1).isImm() &&
2241  MI->getOperand(opNum + 1).getImm() == 0)
2242  return; // don't print ',0' or '+0'
2243  O << "+";
2244  printOperand(MI, opNum + 1, O);
2245  }
2246 }
2247 
2248 // Force static initialization.
2252 }
llvm::NVPTXAsmPrinter::AggBuffer
friend class AggBuffer
Definition: NVPTXAsmPrinter.h:195
llvm::codeview::SimpleTypeKind::Byte
@ Byte
i
i
Definition: README.txt:29
llvm::NVPTXAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: NVPTXAsmPrinter.cpp:881
NVPTXInstPrinter.h
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:487
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:76
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:289
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:59
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:254
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:53
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:60
llvm
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ConvertIntToBytes
static void ConvertIntToBytes(unsigned char *p, T val)
Definition: NVPTXAsmPrinter.cpp:1753
llvm::GlobalValue::hasCommonLinkage
bool hasCommonLinkage() const
Definition: GlobalValue.h:449
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
__CLK_ADDRESS_BASE
@ __CLK_ADDRESS_BASE
Definition: cl_common_defines.h:69
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:124
llvm::NVPTXInstPrinter::getRegisterName
static const char * getRegisterName(unsigned RegNo)
llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:161
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::MCUnaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:464
llvm::AsmPrinter::lowerConstant
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
Definition: AsmPrinter.cpp:2386
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:431
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:163
DebugInfoMetadata.h
MCInstrDesc.h
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:560
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:653
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:140
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:71
llvm::Function
Definition: Function.h:61
llvm::Type::VoidTyID
@ VoidTyID
type with no size
Definition: Type.h:64
StringRef.h
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1005
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:614
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:200
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:46
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:114
llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:88
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
llvm::GlobalValue::hasWeakLinkage
bool hasWeakLinkage() const
Definition: GlobalValue.h:439
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
usedInGlobalVarDef
static bool usedInGlobalVarDef(const Constant *C)
Definition: NVPTXAsmPrinter.cpp:616
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Path.h
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::Value::hasName
bool hasName() const
Definition: Value.h:262
ErrorHandling.h
llvm::NVPTXGenericMCSymbolRefExpr::create
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
DiscoverDependentGlobals
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
Definition: NVPTXAsmPrinter.cpp:99
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:662
usedInOneFunc
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
Definition: NVPTXAsmPrinter.cpp:632
llvm::MCUnaryExpr::Plus
@ Plus
Unary plus.
Definition: MCExpr.h:427
llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:526
llvm::MCBinaryExpr::Add
@ Add
Addition.
Definition: MCExpr.h:480
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
MachineBasicBlock.h
llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:80
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:1637
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::MachineModuleInfo::hasDebugInfo
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: MachineModuleInfo.h:203
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:266
llvm::DenseMapIterator
Definition: DenseMap.h:56
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::AsmPrinter::PrintSymbolOperand
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
Definition: AsmPrinterInlineAsm.cpp:590
DenseMap.h
Module.h
llvm::AttributeList
Definition: Attributes.h:375
llvm::NVPTXFloatMCExpr::createConstantFPDouble
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
printOperand
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
Definition: SelectionDAGDumper.cpp:944
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:212
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:295
NVPTXMCAsmInfo.h
T
#define T
Definition: Mips16ISelLowering.cpp:341
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:757
Operator.h
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:183
NVPTXTargetStreamer.h
llvm::MCBinaryExpr
Binary assembler expressions.
Definition: MCExpr.h:477
caller
int caller(int32 arg1, int32 arg2)
Definition: README.txt:681
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::format_hex_no_prefix
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:202
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MCUnaryExpr
Unary assembler expressions.
Definition: MCExpr.h:421
NVPTXRegisterInfo.h
ConstantFolding.h
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:24
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:133
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:52
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
NVPTX.h
MachineRegisterInfo.h
llvm::AsmPrinter::EmitToStreamer
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:241
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:155
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:124
NVPTXUtilities.h
MachineValueType.h
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:196
cl_common_defines.h
NVPTXTargetInfo.h
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
Instruction.h
llvm::MachineBasicBlock::getSymbol
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
Definition: MachineBasicBlock.cpp:59
CommandLine.h
llvm::NVPTXFloatMCExpr::createConstantFPSingle
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
llvm::NVPTXAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
Definition: NVPTXAsmPrinter.cpp:481
TargetLowering.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Type::isSingleValueType
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:257
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:258
printMCExpr
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
Definition: SystemZAsmParser.cpp:691
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:204
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:157
GlobalValue.h
MachineLoopInfo.h
llvm::ConstantDataSequential
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:568
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:957
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:994
llvm::GlobalValue::hasAppendingLinkage
bool hasAppendingLinkage() const
Definition: GlobalValue.h:442
Constants.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
SmallString.h
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:62
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
ConvertFloatToBytes
static void ConvertFloatToBytes(unsigned char *p, float val)
Definition: NVPTXAsmPrinter.cpp:1760
llvm::User
Definition: User.h:44
llvm::MCExpr::Target
@ Target
Target specific expression.
Definition: MCExpr.h:42
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:97
llvm::Type::getDoubleTy
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:192
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3141
llvm::DICompileUnit::LineTablesOnly
@ LineTablesOnly
Definition: DebugInfoMetadata.h:1321
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
MCSymbol.h
llvm::GlobalValue::hasPrivateLinkage
bool hasPrivateLinkage() const
Definition: GlobalValue.h:444
llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:83
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
MCInst.h
DenseSet.h
llvm::MCBinaryExpr::getRHS
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:627
llvm::NVPTXII::IsTexModeUnifiedFlag
@ IsTexModeUnifiedFlag
Definition: NVPTXBaseInfo.h:40
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:237
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:407
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::NVPTXTargetMachine::getManagedStrPool
ManagedStringPool * getManagedStrPool() const
Definition: NVPTXTargetMachine.h:49
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:262
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:139
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::NVPTXII::IsSuldShift
@ IsSuldShift
Definition: NVPTXBaseInfo.h:37
llvm::NVPTXSubtarget::getRegisterInfo
const NVPTXRegisterInfo * getRegisterInfo() const override
Definition: NVPTXSubtarget.h:61
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::AsmPrinter::CurrentFnSym
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:119
llvm::Module::global_iterator
GlobalListType::iterator global_iterator
The Global Variable iterator.
Definition: Module.h:87
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:255
APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...
llvm::MCSymbolRefExpr::getSymbol
const MCSymbol & getSymbol() const
Definition: MCExpr.h:395
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:183
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:563
DebugLoc.h
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:443
llvm::AttributeList::hasParamAttribute
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
Definition: Attributes.cpp:1559
llvm::Type::PointerTyID
@ PointerTyID
Pointers.
Definition: Type.h:74
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
__CLK_FILTER_MASK
@ __CLK_FILTER_MASK
Definition: cl_common_defines.h:97
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1132
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::DICompileUnit::DebugDirectivesOnly
@ DebugDirectivesOnly
Definition: DebugInfoMetadata.h:1322
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
llvm::NVPTXII::IsSustFlag
@ IsSustFlag
Definition: NVPTXBaseInfo.h:38
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MCSymbol::print
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:59
llvm::MCUnaryExpr::Minus
@ Minus
Unary minus.
Definition: MCExpr.h:425
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:357
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3158
llvm::MachineLoopInfo::isLoopHeader
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
Definition: MachineLoopInfo.h:139
Type.h
llvm::MCExpr::Binary
@ Binary
Binary expressions.
Definition: MCExpr.h:38
llvm::DICompileUnit::FullDebug
@ FullDebug
Definition: DebugInfoMetadata.h:1320
NVPTXMachineFunctionInfo.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::getNVPTXRegClassStr
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:72
llvm::NVPTX::NVCL
@ NVCL
Definition: NVPTX.h:71
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:209
llvm::SmallString< 128 >
llvm::ManagedStringPool::getManagedString
std::string * getManagedString(const char *S)
Definition: ManagedStringPool.h:39
llvm::getTheNVPTXTarget64
Target & getTheNVPTXTarget64()
Definition: NVPTXTargetInfo.cpp:17
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MCConstantExpr
Definition: MCExpr.h:144
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXFloatMCExpr::createConstantFPHalf
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:274
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::NVPTXRegisterInfo::getName
const char * getName(unsigned RegNo) const
Definition: NVPTXRegisterInfo.h:51
BasicBlock.h
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:106
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::APFloat
Definition: APFloat.h:701
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:432
LLVMInitializeNVPTXAsmPrinter
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
Definition: NVPTXAsmPrinter.cpp:2249
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::GlobalValue
Definition: GlobalValue.h:44
NVPTXAsmPrinter.h
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
VI
@ VI
Definition: SIInstrInfo.cpp:7343
llvm::NVPTXSubtarget::hasImageHandles
bool hasImageHandles() const
Definition: NVPTXSubtarget.cpp:55
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:318
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:723
DEPOTNAME
#define DEPOTNAME
Definition: NVPTXAsmPrinter.cpp:94
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Type::isIntOrPtrTy
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:217
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
llvm::NVPTXAsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: NVPTXAsmPrinter.cpp:779
llvm::DenseMap< unsigned, unsigned >
DebugInfo.h
NVPTXBaseInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
Analysis.h
StringExtras.h
getOpenCLAlignment
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty)
Definition: NVPTXAsmPrinter.cpp:1365
isEmptyXXStructor
static bool isEmptyXXStructor(GlobalVariable *GV)
Definition: NVPTXAsmPrinter.cpp:758
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1313
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::AttributeList::getParamAlignment
MaybeAlign getParamAlignment(unsigned ArgNo) const
Return the alignment for the specified function parameter.
Definition: Attributes.cpp:1582
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
useFuncSeen
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
Definition: NVPTXAsmPrinter.cpp:681
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:216
printMemOperand
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Definition: SelectionDAGDumper.cpp:511
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:544
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::begin
iterator begin()
Definition: DenseSet.h:173
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:472
llvm::NVPTXII::IsSuldMask
@ IsSuldMask
Definition: NVPTXBaseInfo.h:36
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:815
llvm::NVPTXMachineFunctionInfo::getImageHandleSymbol
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
Definition: NVPTXMachineFunctionInfo.h:43
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:571
__CLK_FILTER_BASE
@ __CLK_FILTER_BASE
Definition: cl_common_defines.h:93
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:100
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:92
MachineModuleInfo.h
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::MachineOperand::MO_FPImmediate
@ MO_FPImmediate
Floating-point immediate operand.
Definition: MachineOperand.h:55
llvm::GlobalObject::getAlignment
unsigned getAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: GlobalObject.h:73
__CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_BASE
Definition: cl_common_defines.h:85
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
VisitGlobalVariableForEmission
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
Definition: NVPTXAsmPrinter.cpp:116
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::MCSymbolRefExpr
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MachineFunction
Definition: MachineFunction.h:227
Triple.h
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1486
llvm::AsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:331
llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:521
llvm::NVPTXAsmPrinter::getVirtualRegisterName
std::string getVirtualRegisterName(unsigned) const
Definition: NVPTXAsmPrinter.cpp:578
llvm::DICompileUnit::NoDebug
@ NoDebug
Definition: DebugInfoMetadata.h:1319
llvm::AsmPrinter::GetExternalSymbolSymbol
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
Definition: AsmPrinter.cpp:3086
llvm::MCUnaryExpr::getSubExpr
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:467
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:549
DataLayout.h
llvm::SymbolTableList< GlobalVariable >
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:166
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
GetSymbolRef
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
Definition: HexagonMCInstLower.cpp:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:133
TargetLoweringObjectFile.h
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:937
llvm::MachineOperand::MO_MachineBasicBlock
@ MO_MachineBasicBlock
MachineBasicBlock reference.
Definition: MachineOperand.h:56
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2045
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
int64
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp which is much esp edx eax decl edx jle L7 esp ret eax ja L5 call abort Tail call optimization int64
Definition: README.txt:680
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:585
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:636
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::AsmPrinter::MMI
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:103
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:140
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:72
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
canDemoteGlobalVar
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
Definition: NVPTXAsmPrinter.cpp:663
llvm::NVPTXTargetStreamer
Implments NVPTX-specific streamer.
Definition: NVPTXTargetStreamer.h:18
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:93
j
return j(j<< 16)
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::MCBinaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:621
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:205
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:931
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:250
llvm::NVPTX::CUDA
@ CUDA
Definition: NVPTX.h:72
uint16_t
llvm::NVPTXII::IsSurfTexQueryFlag
@ IsSurfTexQueryFlag
Definition: NVPTXBaseInfo.h:39
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:551
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:189
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:77
llvm::MachineOperand::MO_ExternalSymbol
@ MO_ExternalSymbol
Name of external global symbol.
Definition: MachineOperand.h:61
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:144
llvm::MCUnaryExpr::LNot
@ LNot
Logical negation.
Definition: MCExpr.h:424
Casting.h
llvm::NVPTXTargetMachine::getSubtargetImpl
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: NVPTXTargetMachine.h:42
NVPTXTargetMachine.h
Function.h
__CLK_ADDRESS_MASK
@ __CLK_ADDRESS_MASK
Definition: cl_common_defines.h:79
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:100
llvm::APInt::getLoBits
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:567
ConvertDoubleToBytes
static void ConvertDoubleToBytes(unsigned char *p, double val)
Definition: NVPTXAsmPrinter.cpp:1767
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:386
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:382
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:717
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:320
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:284
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:232
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:79
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:246
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:188
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:75
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
align
mov r0 ldr L5 sub r0 lr needed for prologue ldmia ip add bx lr r2 The last stmia stores r2 into the address passed in there is one additional stmia that stores and r2 to some stack location The store is dead The llvm gcc generated code looks like align
Definition: README.txt:236
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:41
llvm::MCExpr::print
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:42
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4818
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:85
SmallVector.h
llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:81
User.h
llvm::MCExpr::SymbolRef
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
__CLK_NORMALIZED_MASK
@ __CLK_NORMALIZED_MASK
Definition: cl_common_defines.h:88
llvm::MachineOperand::getSymbolName
const char * getSymbolName() const
Definition: MachineOperand.h:605
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:161
llvm::MCExpr::Unary
@ Unary
Unary expressions.
Definition: MCExpr.h:41
MCStreamer.h
UnrollLoop.h
llvm::Module::const_iterator
FunctionListType::const_iterator const_iterator
The Function constant iterator.
Definition: Module.h:94
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::MCExpr::Constant
@ Constant
Constant expressions.
Definition: MCExpr.h:39
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:647
NVPTXSubtarget.h
llvm::NVPTXMachineFunctionInfo
Definition: NVPTXMachineFunctionInfo.h:20
llvm::AsmPrinter::getDataLayout
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:226
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:152
llvm::GlobalValue::hasLinkOnceLinkage
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:435
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1233
MachineOperand.h
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:35
llvm::NVPTXAsmPrinter::getFunctionFrameSymbol
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
Definition: NVPTXAsmPrinter.cpp:503
llvm::AsmPrinter::emitInitialRawDwarfLocDirective
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:245
llvm::MCSymbolRefExpr::VK_None
@ VK_None
Definition: MCExpr.h:195
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:265
LLVMContext.h
llvm::NVPTXTargetMachine::getDrvInterface
NVPTX::DrvInterface getDrvInterface() const
Definition: NVPTXTargetMachine.h:48
llvm::RegisterAsmPrinter
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...
Definition: TargetRegistry.h:1181
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:242
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:227
raw_ostream.h
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:270
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::getNVPTXRegClassName
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:28
TargetRegistry.h
MCExpr.h
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:216
llvm::NVPTXII::IsTexFlag
@ IsTexFlag
Definition: NVPTXBaseInfo.h:35
CU
Definition: AArch64AsmBackend.cpp:515
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:218
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
TargetRegisterInfo.h
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
NVPTXMCExpr.h
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:57
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:600
llvm::Type::getFloatTy
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:191
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:342
llvm::MCUnaryExpr::Not
@ Not
Bitwise negation.
Definition: MCExpr.h:426
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:129
llvm::MCBinaryExpr::getLHS
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:624
llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:265
llvm::getTheNVPTXTarget32
Target & getTheNVPTXTarget32()
Definition: NVPTXTargetInfo.cpp:13