LLVM 19.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalAlias.h"
61#include "llvm/IR/GlobalValue.h"
63#include "llvm/IR/Instruction.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/MC/MCExpr.h"
70#include "llvm/MC/MCInst.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/MC/MCStreamer.h"
73#include "llvm/MC/MCSymbol.h"
78#include "llvm/Support/Endian.h"
81#include "llvm/Support/Path.h"
87#include <cassert>
88#include <cstdint>
89#include <cstring>
90#include <new>
91#include <string>
92#include <utility>
93#include <vector>
94
95using namespace llvm;
96
97static cl::opt<bool>
98 LowerCtorDtor("nvptx-lower-global-ctor-dtor",
99 cl::desc("Lower GPU ctor / dtors to globals on the device."),
100 cl::init(false), cl::Hidden);
101
102#define DEPOTNAME "__local_depot"
103
104/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
105/// depends.
106static void
109 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
110 Globals.insert(GV);
111 else {
112 if (const User *U = dyn_cast<User>(V)) {
113 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
114 DiscoverDependentGlobals(U->getOperand(i), Globals);
115 }
116 }
117 }
118}
119
120/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
121/// instances to be emitted, but only after any dependents have been added
122/// first.s
123static void
128 // Have we already visited this one?
129 if (Visited.count(GV))
130 return;
131
132 // Do we have a circular dependency?
133 if (!Visiting.insert(GV).second)
134 report_fatal_error("Circular dependency found in global variable set");
135
136 // Make sure we visit all dependents first
138 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
139 DiscoverDependentGlobals(GV->getOperand(i), Others);
140
141 for (const GlobalVariable *GV : Others)
142 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
143
144 // Now we can visit ourself
145 Order.push_back(GV);
146 Visited.insert(GV);
147 Visiting.erase(GV);
148}
149
150void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
151 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
152 getSubtargetInfo().getFeatureBits());
153
154 MCInst Inst;
155 lowerToMCInst(MI, Inst);
157}
158
159// Handle symbol backtracking for targets that do not support image handles
160bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
161 unsigned OpNo, MCOperand &MCOp) {
162 const MachineOperand &MO = MI->getOperand(OpNo);
163 const MCInstrDesc &MCID = MI->getDesc();
164
165 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
166 // This is a texture fetch, so operand 4 is a texref and operand 5 is
167 // a samplerref
168 if (OpNo == 4 && MO.isImm()) {
169 lowerImageHandleSymbol(MO.getImm(), MCOp);
170 return true;
171 }
172 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
173 lowerImageHandleSymbol(MO.getImm(), MCOp);
174 return true;
175 }
176
177 return false;
178 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
179 unsigned VecSize =
180 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
181
182 // For a surface load of vector size N, the Nth operand will be the surfref
183 if (OpNo == VecSize && MO.isImm()) {
184 lowerImageHandleSymbol(MO.getImm(), MCOp);
185 return true;
186 }
187
188 return false;
189 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
190 // This is a surface store, so operand 0 is a surfref
191 if (OpNo == 0 && MO.isImm()) {
192 lowerImageHandleSymbol(MO.getImm(), MCOp);
193 return true;
194 }
195
196 return false;
197 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
198 // This is a query, so operand 1 is a surfref/texref
199 if (OpNo == 1 && MO.isImm()) {
200 lowerImageHandleSymbol(MO.getImm(), MCOp);
201 return true;
202 }
203
204 return false;
205 }
206
207 return false;
208}
209
210void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
211 // Ewwww
213 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
215 const char *Sym = MFI->getImageHandleSymbol(Index);
216 StringRef SymName = nvTM.getStrPool().save(Sym);
217 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
218}
219
220void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
221 OutMI.setOpcode(MI->getOpcode());
222 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
223 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
224 const MachineOperand &MO = MI->getOperand(0);
225 OutMI.addOperand(GetSymbolRef(
227 return;
228 }
229
230 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
231 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
232 const MachineOperand &MO = MI->getOperand(i);
233
234 MCOperand MCOp;
235 if (!STI.hasImageHandles()) {
236 if (lowerImageHandleOperand(MI, i, MCOp)) {
237 OutMI.addOperand(MCOp);
238 continue;
239 }
240 }
241
242 if (lowerOperand(MO, MCOp))
243 OutMI.addOperand(MCOp);
244 }
245}
246
247bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
248 MCOperand &MCOp) {
249 switch (MO.getType()) {
250 default: llvm_unreachable("unknown operand type");
252 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
253 break;
255 MCOp = MCOperand::createImm(MO.getImm());
256 break;
259 MO.getMBB()->getSymbol(), OutContext));
260 break;
262 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
263 break;
265 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
266 break;
268 const ConstantFP *Cnt = MO.getFPImm();
269 const APFloat &Val = Cnt->getValueAPF();
270
271 switch (Cnt->getType()->getTypeID()) {
272 default: report_fatal_error("Unsupported FP type"); break;
273 case Type::HalfTyID:
276 break;
277 case Type::BFloatTyID:
280 break;
281 case Type::FloatTyID:
284 break;
285 case Type::DoubleTyID:
288 break;
289 }
290 break;
291 }
292 }
293 return true;
294}
295
296unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
298 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
299
300 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
301 unsigned RegNum = RegMap[Reg];
302
303 // Encode the register class in the upper 4 bits
304 // Must be kept in sync with NVPTXInstPrinter::printRegName
305 unsigned Ret = 0;
306 if (RC == &NVPTX::Int1RegsRegClass) {
307 Ret = (1 << 28);
308 } else if (RC == &NVPTX::Int16RegsRegClass) {
309 Ret = (2 << 28);
310 } else if (RC == &NVPTX::Int32RegsRegClass) {
311 Ret = (3 << 28);
312 } else if (RC == &NVPTX::Int64RegsRegClass) {
313 Ret = (4 << 28);
314 } else if (RC == &NVPTX::Float32RegsRegClass) {
315 Ret = (5 << 28);
316 } else if (RC == &NVPTX::Float64RegsRegClass) {
317 Ret = (6 << 28);
318 } else {
319 report_fatal_error("Bad register class");
320 }
321
322 // Insert the vreg number
323 Ret |= (RegNum & 0x0FFFFFFF);
324 return Ret;
325 } else {
326 // Some special-use registers are actually physical registers.
327 // Encode this as the register class ID of 0 and the real register ID.
328 return Reg & 0x0FFFFFFF;
329 }
330}
331
332MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
333 const MCExpr *Expr;
335 OutContext);
336 return MCOperand::createExpr(Expr);
337}
338
339static bool ShouldPassAsArray(Type *Ty) {
340 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
341 Ty->isHalfTy() || Ty->isBFloatTy();
342}
343
344void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
345 const DataLayout &DL = getDataLayout();
347 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
348
349 Type *Ty = F->getReturnType();
350
351 bool isABI = (STI.getSmVersion() >= 20);
352
353 if (Ty->getTypeID() == Type::VoidTyID)
354 return;
355 O << " (";
356
357 if (isABI) {
358 if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
359 !ShouldPassAsArray(Ty)) {
360 unsigned size = 0;
361 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
362 size = ITy->getBitWidth();
363 } else {
364 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
366 }
368 O << ".param .b" << size << " func_retval0";
369 } else if (isa<PointerType>(Ty)) {
370 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
371 << " func_retval0";
372 } else if (ShouldPassAsArray(Ty)) {
373 unsigned totalsz = DL.getTypeAllocSize(Ty);
374 Align RetAlignment = TLI->getFunctionArgumentAlignment(
376 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
377 << totalsz << "]";
378 } else
379 llvm_unreachable("Unknown return type");
380 } else {
381 SmallVector<EVT, 16> vtparts;
382 ComputeValueVTs(*TLI, DL, Ty, vtparts);
383 unsigned idx = 0;
384 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
385 unsigned elems = 1;
386 EVT elemtype = vtparts[i];
387 if (vtparts[i].isVector()) {
388 elems = vtparts[i].getVectorNumElements();
389 elemtype = vtparts[i].getVectorElementType();
390 }
391
392 for (unsigned j = 0, je = elems; j != je; ++j) {
393 unsigned sz = elemtype.getSizeInBits();
394 if (elemtype.isInteger())
396 O << ".reg .b" << sz << " func_retval" << idx;
397 if (j < je - 1)
398 O << ", ";
399 ++idx;
400 }
401 if (i < e - 1)
402 O << ", ";
403 }
404 }
405 O << ") ";
406}
407
408void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
409 raw_ostream &O) {
410 const Function &F = MF.getFunction();
411 printReturnValStr(&F, O);
412}
413
414// Return true if MBB is the header of a loop marked with
415// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
416bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
417 const MachineBasicBlock &MBB) const {
418 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
419 // We insert .pragma "nounroll" only to the loop header.
420 if (!LI.isLoopHeader(&MBB))
421 return false;
422
423 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
424 // we iterate through each back edge of the loop with header MBB, and check
425 // whether its metadata contains llvm.loop.unroll.disable.
426 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
427 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
428 // Edges from other loops to MBB are not back edges.
429 continue;
430 }
431 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
432 if (MDNode *LoopID =
433 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
434 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
435 return true;
436 if (MDNode *UnrollCountMD =
437 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
438 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
439 ->isOne())
440 return true;
441 }
442 }
443 }
444 }
445 return false;
446}
447
448void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
450 if (isLoopHeaderOfNoUnroll(MBB))
451 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
452}
453
454void NVPTXAsmPrinter::emitFunctionEntryLabel() {
457
458 if (!GlobalsEmitted) {
459 emitGlobals(*MF->getFunction().getParent());
460 GlobalsEmitted = true;
461 }
462
463 // Set up
464 MRI = &MF->getRegInfo();
465 F = &MF->getFunction();
466 emitLinkageDirective(F, O);
467 if (isKernelFunction(*F))
468 O << ".entry ";
469 else {
470 O << ".func ";
471 printReturnValStr(*MF, O);
472 }
473
475
476 emitFunctionParamList(F, O);
477 O << "\n";
478
479 if (isKernelFunction(*F))
480 emitKernelFunctionDirectives(*F, O);
481
483 O << ".noreturn";
484
485 OutStreamer->emitRawText(O.str());
486
487 VRegMapping.clear();
488 // Emit open brace for function body.
489 OutStreamer->emitRawText(StringRef("{\n"));
490 setAndEmitFunctionVirtualRegisters(*MF);
491 // Emit initial .loc debug directive for correct relocation symbol data.
492 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
493 assert(SP->getUnit());
494 if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
496 }
497}
498
500 bool Result = AsmPrinter::runOnMachineFunction(F);
501 // Emit closing brace for the body of function F.
502 // The closing brace must be emitted here because we need to emit additional
503 // debug labels/data after the last basic block.
504 // We need to emit the closing brace here because we don't have function that
505 // finished emission of the function body.
506 OutStreamer->emitRawText(StringRef("}\n"));
507 return Result;
508}
509
510void NVPTXAsmPrinter::emitFunctionBodyStart() {
512 raw_svector_ostream O(Str);
513 emitDemotedVars(&MF->getFunction(), O);
514 OutStreamer->emitRawText(O.str());
515}
516
517void NVPTXAsmPrinter::emitFunctionBodyEnd() {
518 VRegMapping.clear();
519}
520
524 return OutContext.getOrCreateSymbol(Str);
525}
526
527void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
528 Register RegNo = MI->getOperand(0).getReg();
529 if (RegNo.isVirtual()) {
530 OutStreamer->AddComment(Twine("implicit-def: ") +
532 } else {
533 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
534 OutStreamer->AddComment(Twine("implicit-def: ") +
535 STI.getRegisterInfo()->getName(RegNo));
536 }
537 OutStreamer->addBlankLine();
538}
539
540void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
541 raw_ostream &O) const {
542 // If the NVVM IR has some of reqntid* specified, then output
543 // the reqntid directive, and set the unspecified ones to 1.
544 // If none of Reqntid* is specified, don't output reqntid directive.
545 std::optional<unsigned> Reqntidx = getReqNTIDx(F);
546 std::optional<unsigned> Reqntidy = getReqNTIDy(F);
547 std::optional<unsigned> Reqntidz = getReqNTIDz(F);
548
549 if (Reqntidx || Reqntidy || Reqntidz)
550 O << ".reqntid " << Reqntidx.value_or(1) << ", " << Reqntidy.value_or(1)
551 << ", " << Reqntidz.value_or(1) << "\n";
552
553 // If the NVVM IR has some of maxntid* specified, then output
554 // the maxntid directive, and set the unspecified ones to 1.
555 // If none of maxntid* is specified, don't output maxntid directive.
556 std::optional<unsigned> Maxntidx = getMaxNTIDx(F);
557 std::optional<unsigned> Maxntidy = getMaxNTIDy(F);
558 std::optional<unsigned> Maxntidz = getMaxNTIDz(F);
559
560 if (Maxntidx || Maxntidy || Maxntidz)
561 O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1)
562 << ", " << Maxntidz.value_or(1) << "\n";
563
564 unsigned Mincta = 0;
565 if (getMinCTASm(F, Mincta))
566 O << ".minnctapersm " << Mincta << "\n";
567
568 unsigned Maxnreg = 0;
569 if (getMaxNReg(F, Maxnreg))
570 O << ".maxnreg " << Maxnreg << "\n";
571
572 // .maxclusterrank directive requires SM_90 or higher, make sure that we
573 // filter it out for lower SM versions, as it causes a hard ptxas crash.
574 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
575 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
576 unsigned Maxclusterrank = 0;
577 if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
578 O << ".maxclusterrank " << Maxclusterrank << "\n";
579}
580
581std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
582 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
583
584 std::string Name;
585 raw_string_ostream NameStr(Name);
586
587 VRegRCMap::const_iterator I = VRegMapping.find(RC);
588 assert(I != VRegMapping.end() && "Bad register class");
589 const DenseMap<unsigned, unsigned> &RegMap = I->second;
590
591 VRegMap::const_iterator VI = RegMap.find(Reg);
592 assert(VI != RegMap.end() && "Bad virtual register");
593 unsigned MappedVR = VI->second;
594
595 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
596
597 NameStr.flush();
598 return Name;
599}
600
601void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
602 raw_ostream &O) {
603 O << getVirtualRegisterName(vr);
604}
605
606void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
607 raw_ostream &O) {
608 const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject());
609 if (!F || isKernelFunction(*F) || F->isDeclaration())
611 "NVPTX aliasee must be a non-kernel function definition");
612
613 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
615 report_fatal_error("NVPTX aliasee must not be '.weak'");
616
617 emitDeclarationWithName(F, getSymbol(GA), O);
618}
619
620void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
621 emitDeclarationWithName(F, getSymbol(F), O);
622}
623
624void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
625 raw_ostream &O) {
626 emitLinkageDirective(F, O);
627 if (isKernelFunction(*F))
628 O << ".entry ";
629 else
630 O << ".func ";
631 printReturnValStr(F, O);
632 S->print(O, MAI);
633 O << "\n";
634 emitFunctionParamList(F, O);
635 O << "\n";
637 O << ".noreturn";
638 O << ";\n";
639}
640
641static bool usedInGlobalVarDef(const Constant *C) {
642 if (!C)
643 return false;
644
645 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
646 return GV->getName() != "llvm.used";
647 }
648
649 for (const User *U : C->users())
650 if (const Constant *C = dyn_cast<Constant>(U))
652 return true;
653
654 return false;
655}
656
657static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
658 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
659 if (othergv->getName() == "llvm.used")
660 return true;
661 }
662
663 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
664 if (instr->getParent() && instr->getParent()->getParent()) {
665 const Function *curFunc = instr->getParent()->getParent();
666 if (oneFunc && (curFunc != oneFunc))
667 return false;
668 oneFunc = curFunc;
669 return true;
670 } else
671 return false;
672 }
673
674 for (const User *UU : U->users())
675 if (!usedInOneFunc(UU, oneFunc))
676 return false;
677
678 return true;
679}
680
681/* Find out if a global variable can be demoted to local scope.
682 * Currently, this is valid for CUDA shared variables, which have local
683 * scope and global lifetime. So the conditions to check are :
684 * 1. Is the global variable in shared address space?
685 * 2. Does it have local linkage?
686 * 3. Is the global variable referenced only in one function?
687 */
688static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
689 if (!gv->hasLocalLinkage())
690 return false;
691 PointerType *Pty = gv->getType();
692 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
693 return false;
694
695 const Function *oneFunc = nullptr;
696
697 bool flag = usedInOneFunc(gv, oneFunc);
698 if (!flag)
699 return false;
700 if (!oneFunc)
701 return false;
702 f = oneFunc;
703 return true;
704}
705
706static bool useFuncSeen(const Constant *C,
708 for (const User *U : C->users()) {
709 if (const Constant *cu = dyn_cast<Constant>(U)) {
710 if (useFuncSeen(cu, seenMap))
711 return true;
712 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
713 const BasicBlock *bb = I->getParent();
714 if (!bb)
715 continue;
716 const Function *caller = bb->getParent();
717 if (!caller)
718 continue;
719 if (seenMap.contains(caller))
720 return true;
721 }
722 }
723 return false;
724}
725
726void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
728 for (const Function &F : M) {
729 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
730 emitDeclaration(&F, O);
731 continue;
732 }
733
734 if (F.isDeclaration()) {
735 if (F.use_empty())
736 continue;
737 if (F.getIntrinsicID())
738 continue;
739 emitDeclaration(&F, O);
740 continue;
741 }
742 for (const User *U : F.users()) {
743 if (const Constant *C = dyn_cast<Constant>(U)) {
744 if (usedInGlobalVarDef(C)) {
745 // The use is in the initialization of a global variable
746 // that is a function pointer, so print a declaration
747 // for the original function
748 emitDeclaration(&F, O);
749 break;
750 }
751 // Emit a declaration of this function if the function that
752 // uses this constant expr has already been seen.
753 if (useFuncSeen(C, seenMap)) {
754 emitDeclaration(&F, O);
755 break;
756 }
757 }
758
759 if (!isa<Instruction>(U))
760 continue;
761 const Instruction *instr = cast<Instruction>(U);
762 const BasicBlock *bb = instr->getParent();
763 if (!bb)
764 continue;
765 const Function *caller = bb->getParent();
766 if (!caller)
767 continue;
768
769 // If a caller has already been seen, then the caller is
770 // appearing in the module before the callee. so print out
771 // a declaration for the callee.
772 if (seenMap.contains(caller)) {
773 emitDeclaration(&F, O);
774 break;
775 }
776 }
777 seenMap[&F] = true;
778 }
779 for (const GlobalAlias &GA : M.aliases())
780 emitAliasDeclaration(&GA, O);
781}
782
784 if (!GV) return true;
785 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
786 if (!InitList) return true; // Not an array; we don't know how to parse.
787 return InitList->getNumOperands() == 0;
788}
789
790void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
791 // Construct a default subtarget off of the TargetMachine defaults. The
792 // rest of NVPTX isn't friendly to change subtargets per function and
793 // so the default TargetMachine will have all of the options.
794 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
795 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
796 SmallString<128> Str1;
797 raw_svector_ostream OS1(Str1);
798
799 // Emit header before any dwarf directives are emitted below.
800 emitHeader(M, OS1, *STI);
801 OutStreamer->emitRawText(OS1.str());
802}
803
805 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
806 const NVPTXSubtarget &STI =
807 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
808 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
809 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
810
811 // OpenMP supports NVPTX global constructors and destructors.
812 bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
813
814 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
815 !LowerCtorDtor && !IsOpenMP) {
817 "Module has a nontrivial global ctor, which NVPTX does not support.");
818 return true; // error
819 }
820 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
821 !LowerCtorDtor && !IsOpenMP) {
823 "Module has a nontrivial global dtor, which NVPTX does not support.");
824 return true; // error
825 }
826
827 // We need to call the parent's one explicitly.
828 bool Result = AsmPrinter::doInitialization(M);
829
830 GlobalsEmitted = false;
831
832 return Result;
833}
834
835void NVPTXAsmPrinter::emitGlobals(const Module &M) {
836 SmallString<128> Str2;
837 raw_svector_ostream OS2(Str2);
838
839 emitDeclarations(M, OS2);
840
841 // As ptxas does not support forward references of globals, we need to first
842 // sort the list of module-level globals in def-use order. We visit each
843 // global variable in order, and ensure that we emit it *after* its dependent
844 // globals. We use a little extra memory maintaining both a set and a list to
845 // have fast searches while maintaining a strict ordering.
849
850 // Visit each global variable, in order
851 for (const GlobalVariable &I : M.globals())
852 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
853
854 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
855 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
856
857 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
858 const NVPTXSubtarget &STI =
859 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
860
861 // Print out module-level global variables in proper order
862 for (unsigned i = 0, e = Globals.size(); i != e; ++i)
863 printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
864
865 OS2 << '\n';
866
867 OutStreamer->emitRawText(OS2.str());
868}
869
870void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
873
874 MCSymbol *Name = getSymbol(&GA);
875
876 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
877 << ";\n";
878
879 OutStreamer->emitRawText(OS.str());
880}
881
882void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
883 const NVPTXSubtarget &STI) {
884 O << "//\n";
885 O << "// Generated by LLVM NVPTX Back-End\n";
886 O << "//\n";
887 O << "\n";
888
889 unsigned PTXVersion = STI.getPTXVersion();
890 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
891
892 O << ".target ";
893 O << STI.getTargetName();
894
895 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
896 if (NTM.getDrvInterface() == NVPTX::NVCL)
897 O << ", texmode_independent";
898
899 bool HasFullDebugInfo = false;
900 for (DICompileUnit *CU : M.debug_compile_units()) {
901 switch(CU->getEmissionKind()) {
904 break;
907 HasFullDebugInfo = true;
908 break;
909 }
910 if (HasFullDebugInfo)
911 break;
912 }
913 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
914 O << ", debug";
915
916 O << "\n";
917
918 O << ".address_size ";
919 if (NTM.is64Bit())
920 O << "64";
921 else
922 O << "32";
923 O << "\n";
924
925 O << "\n";
926}
927
929 bool HasDebugInfo = MMI && MMI->hasDebugInfo();
930
931 // If we did not emit any functions, then the global declarations have not
932 // yet been emitted.
933 if (!GlobalsEmitted) {
934 emitGlobals(M);
935 GlobalsEmitted = true;
936 }
937
938 // call doFinalization
939 bool ret = AsmPrinter::doFinalization(M);
940
942
943 auto *TS =
944 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
945 // Close the last emitted section
946 if (HasDebugInfo) {
947 TS->closeLastSection();
948 // Emit empty .debug_loc section for better support of the empty files.
949 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
950 }
951
952 // Output last DWARF .file directives, if any.
953 TS->outputDwarfFileDirectives();
954
955 return ret;
956}
957
958// This function emits appropriate linkage directives for
959// functions and global variables.
960//
961// extern function declaration -> .extern
962// extern function definition -> .visible
963// external global variable with init -> .visible
964// external without init -> .extern
965// appending -> not allowed, assert.
966// for any linkage other than
967// internal, private, linker_private,
968// linker_private_weak, linker_private_weak_def_auto,
969// we emit -> .weak.
970
971void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
972 raw_ostream &O) {
973 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
974 if (V->hasExternalLinkage()) {
975 if (isa<GlobalVariable>(V)) {
976 const GlobalVariable *GVar = cast<GlobalVariable>(V);
977 if (GVar) {
978 if (GVar->hasInitializer())
979 O << ".visible ";
980 else
981 O << ".extern ";
982 }
983 } else if (V->isDeclaration())
984 O << ".extern ";
985 else
986 O << ".visible ";
987 } else if (V->hasAppendingLinkage()) {
988 std::string msg;
989 msg.append("Error: ");
990 msg.append("Symbol ");
991 if (V->hasName())
992 msg.append(std::string(V->getName()));
993 msg.append("has unsupported appending linkage type");
994 llvm_unreachable(msg.c_str());
995 } else if (!V->hasInternalLinkage() &&
996 !V->hasPrivateLinkage()) {
997 O << ".weak ";
998 }
999 }
1000}
1001
1002void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1003 raw_ostream &O, bool processDemoted,
1004 const NVPTXSubtarget &STI) {
1005 // Skip meta data
1006 if (GVar->hasSection()) {
1007 if (GVar->getSection() == "llvm.metadata")
1008 return;
1009 }
1010
1011 // Skip LLVM intrinsic global variables
1012 if (GVar->getName().starts_with("llvm.") ||
1013 GVar->getName().starts_with("nvvm."))
1014 return;
1015
1016 const DataLayout &DL = getDataLayout();
1017
1018 // GlobalVariables are always constant pointers themselves.
1019 Type *ETy = GVar->getValueType();
1020
1021 if (GVar->hasExternalLinkage()) {
1022 if (GVar->hasInitializer())
1023 O << ".visible ";
1024 else
1025 O << ".extern ";
1026 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
1028 O << ".common ";
1029 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1031 GVar->hasCommonLinkage()) {
1032 O << ".weak ";
1033 }
1034
1035 if (isTexture(*GVar)) {
1036 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1037 return;
1038 }
1039
1040 if (isSurface(*GVar)) {
1041 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1042 return;
1043 }
1044
1045 if (GVar->isDeclaration()) {
1046 // (extern) declarations, no definition or initializer
1047 // Currently the only known declaration is for an automatic __local
1048 // (.shared) promoted to global.
1049 emitPTXGlobalVariable(GVar, O, STI);
1050 O << ";\n";
1051 return;
1052 }
1053
1054 if (isSampler(*GVar)) {
1055 O << ".global .samplerref " << getSamplerName(*GVar);
1056
1057 const Constant *Initializer = nullptr;
1058 if (GVar->hasInitializer())
1059 Initializer = GVar->getInitializer();
1060 const ConstantInt *CI = nullptr;
1061 if (Initializer)
1062 CI = dyn_cast<ConstantInt>(Initializer);
1063 if (CI) {
1064 unsigned sample = CI->getZExtValue();
1065
1066 O << " = { ";
1067
1068 for (int i = 0,
1069 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1070 i < 3; i++) {
1071 O << "addr_mode_" << i << " = ";
1072 switch (addr) {
1073 case 0:
1074 O << "wrap";
1075 break;
1076 case 1:
1077 O << "clamp_to_border";
1078 break;
1079 case 2:
1080 O << "clamp_to_edge";
1081 break;
1082 case 3:
1083 O << "wrap";
1084 break;
1085 case 4:
1086 O << "mirror";
1087 break;
1088 }
1089 O << ", ";
1090 }
1091 O << "filter_mode = ";
1092 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1093 case 0:
1094 O << "nearest";
1095 break;
1096 case 1:
1097 O << "linear";
1098 break;
1099 case 2:
1100 llvm_unreachable("Anisotropic filtering is not supported");
1101 default:
1102 O << "nearest";
1103 break;
1104 }
1105 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1106 O << ", force_unnormalized_coords = 1";
1107 }
1108 O << " }";
1109 }
1110
1111 O << ";\n";
1112 return;
1113 }
1114
1115 if (GVar->hasPrivateLinkage()) {
1116 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1117 return;
1118
1119 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1120 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1121 return;
1122 if (GVar->use_empty())
1123 return;
1124 }
1125
1126 const Function *demotedFunc = nullptr;
1127 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1128 O << "// " << GVar->getName() << " has been demoted\n";
1129 if (localDecls.find(demotedFunc) != localDecls.end())
1130 localDecls[demotedFunc].push_back(GVar);
1131 else {
1132 std::vector<const GlobalVariable *> temp;
1133 temp.push_back(GVar);
1134 localDecls[demotedFunc] = temp;
1135 }
1136 return;
1137 }
1138
1139 O << ".";
1140 emitPTXAddressSpace(GVar->getAddressSpace(), O);
1141
1142 if (isManaged(*GVar)) {
1143 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1145 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1146 }
1147 O << " .attribute(.managed)";
1148 }
1149
1150 if (MaybeAlign A = GVar->getAlign())
1151 O << " .align " << A->value();
1152 else
1153 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1154
1155 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1156 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1157 O << " .";
1158 // Special case: ABI requires that we use .u8 for predicates
1159 if (ETy->isIntegerTy(1))
1160 O << "u8";
1161 else
1162 O << getPTXFundamentalTypeStr(ETy, false);
1163 O << " ";
1164 getSymbol(GVar)->print(O, MAI);
1165
1166 // Ptx allows variable initilization only for constant and global state
1167 // spaces.
1168 if (GVar->hasInitializer()) {
1169 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1170 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1171 const Constant *Initializer = GVar->getInitializer();
1172 // 'undef' is treated as there is no value specified.
1173 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1174 O << " = ";
1175 printScalarConstant(Initializer, O);
1176 }
1177 } else {
1178 // The frontend adds zero-initializer to device and constant variables
1179 // that don't have an initial value, and UndefValue to shared
1180 // variables, so skip warning for this case.
1181 if (!GVar->getInitializer()->isNullValue() &&
1182 !isa<UndefValue>(GVar->getInitializer())) {
1183 report_fatal_error("initial value of '" + GVar->getName() +
1184 "' is not allowed in addrspace(" +
1185 Twine(GVar->getAddressSpace()) + ")");
1186 }
1187 }
1188 }
1189 } else {
1190 uint64_t ElementSize = 0;
1191
1192 // Although PTX has direct support for struct type and array type and
1193 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1194 // targets that support these high level field accesses. Structs, arrays
1195 // and vectors are lowered into arrays of bytes.
1196 switch (ETy->getTypeID()) {
1197 case Type::IntegerTyID: // Integers larger than 64 bits
1198 case Type::StructTyID:
1199 case Type::ArrayTyID:
1201 ElementSize = DL.getTypeStoreSize(ETy);
1202 // Ptx allows variable initilization only for constant and
1203 // global state spaces.
1204 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1205 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1206 GVar->hasInitializer()) {
1207 const Constant *Initializer = GVar->getInitializer();
1208 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1209 AggBuffer aggBuffer(ElementSize, *this);
1210 bufferAggregateConstant(Initializer, &aggBuffer);
1211 if (aggBuffer.numSymbols()) {
1212 unsigned int ptrSize = MAI->getCodePointerSize();
1213 if (ElementSize % ptrSize ||
1214 !aggBuffer.allSymbolsAligned(ptrSize)) {
1215 // Print in bytes and use the mask() operator for pointers.
1216 if (!STI.hasMaskOperator())
1218 "initialized packed aggregate with pointers '" +
1219 GVar->getName() +
1220 "' requires at least PTX ISA version 7.1");
1221 O << " .u8 ";
1222 getSymbol(GVar)->print(O, MAI);
1223 O << "[" << ElementSize << "] = {";
1224 aggBuffer.printBytes(O);
1225 O << "}";
1226 } else {
1227 O << " .u" << ptrSize * 8 << " ";
1228 getSymbol(GVar)->print(O, MAI);
1229 O << "[" << ElementSize / ptrSize << "] = {";
1230 aggBuffer.printWords(O);
1231 O << "}";
1232 }
1233 } else {
1234 O << " .b8 ";
1235 getSymbol(GVar)->print(O, MAI);
1236 O << "[" << ElementSize << "] = {";
1237 aggBuffer.printBytes(O);
1238 O << "}";
1239 }
1240 } else {
1241 O << " .b8 ";
1242 getSymbol(GVar)->print(O, MAI);
1243 if (ElementSize) {
1244 O << "[";
1245 O << ElementSize;
1246 O << "]";
1247 }
1248 }
1249 } else {
1250 O << " .b8 ";
1251 getSymbol(GVar)->print(O, MAI);
1252 if (ElementSize) {
1253 O << "[";
1254 O << ElementSize;
1255 O << "]";
1256 }
1257 }
1258 break;
1259 default:
1260 llvm_unreachable("type not supported yet");
1261 }
1262 }
1263 O << ";\n";
1264}
1265
1266void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1267 const Value *v = Symbols[nSym];
1268 const Value *v0 = SymbolsBeforeStripping[nSym];
1269 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1270 MCSymbol *Name = AP.getSymbol(GVar);
1271 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1272 // Is v0 a generic pointer?
1273 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1274 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1275 os << "generic(";
1276 Name->print(os, AP.MAI);
1277 os << ")";
1278 } else {
1279 Name->print(os, AP.MAI);
1280 }
1281 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1282 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1283 AP.printMCExpr(*Expr, os);
1284 } else
1285 llvm_unreachable("symbol type unknown");
1286}
1287
1288void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1289 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1290 // Do not emit trailing zero initializers. They will be zero-initialized by
1291 // ptxas. This saves on both space requirements for the generated PTX and on
1292 // memory use by ptxas. (See:
1293 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1294 unsigned int InitializerCount = size;
1295 // TODO: symbols make this harder, but it would still be good to trim trailing
1296 // 0s for aggs with symbols as well.
1297 if (numSymbols() == 0)
1298 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1299 InitializerCount--;
1300
1301 symbolPosInBuffer.push_back(InitializerCount);
1302 unsigned int nSym = 0;
1303 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1304 for (unsigned int pos = 0; pos < InitializerCount;) {
1305 if (pos)
1306 os << ", ";
1307 if (pos != nextSymbolPos) {
1308 os << (unsigned int)buffer[pos];
1309 ++pos;
1310 continue;
1311 }
1312 // Generate a per-byte mask() operator for the symbol, which looks like:
1313 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1314 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1315 std::string symText;
1316 llvm::raw_string_ostream oss(symText);
1317 printSymbol(nSym, oss);
1318 for (unsigned i = 0; i < ptrSize; ++i) {
1319 if (i)
1320 os << ", ";
1321 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1322 os << "(" << symText << ")";
1323 }
1324 pos += ptrSize;
1325 nextSymbolPos = symbolPosInBuffer[++nSym];
1326 assert(nextSymbolPos >= pos);
1327 }
1328}
1329
1330void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1331 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1332 symbolPosInBuffer.push_back(size);
1333 unsigned int nSym = 0;
1334 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1335 assert(nextSymbolPos % ptrSize == 0);
1336 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1337 if (pos)
1338 os << ", ";
1339 if (pos == nextSymbolPos) {
1340 printSymbol(nSym, os);
1341 nextSymbolPos = symbolPosInBuffer[++nSym];
1342 assert(nextSymbolPos % ptrSize == 0);
1343 assert(nextSymbolPos >= pos + ptrSize);
1344 } else if (ptrSize == 4)
1345 os << support::endian::read32le(&buffer[pos]);
1346 else
1347 os << support::endian::read64le(&buffer[pos]);
1348 }
1349}
1350
1351void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1352 if (localDecls.find(f) == localDecls.end())
1353 return;
1354
1355 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1356
1357 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1358 const NVPTXSubtarget &STI =
1359 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1360
1361 for (const GlobalVariable *GV : gvars) {
1362 O << "\t// demoted variable\n\t";
1363 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1364 }
1365}
1366
1367void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1368 raw_ostream &O) const {
1369 switch (AddressSpace) {
1371 O << "local";
1372 break;
1374 O << "global";
1375 break;
1377 O << "const";
1378 break;
1380 O << "shared";
1381 break;
1382 default:
1383 report_fatal_error("Bad address space found while emitting PTX: " +
1385 break;
1386 }
1387}
1388
1389std::string
1390NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1391 switch (Ty->getTypeID()) {
1392 case Type::IntegerTyID: {
1393 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1394 if (NumBits == 1)
1395 return "pred";
1396 else if (NumBits <= 64) {
1397 std::string name = "u";
1398 return name + utostr(NumBits);
1399 } else {
1400 llvm_unreachable("Integer too large");
1401 break;
1402 }
1403 break;
1404 }
1405 case Type::BFloatTyID:
1406 case Type::HalfTyID:
1407 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1408 // PTX assembly.
1409 return "b16";
1410 case Type::FloatTyID:
1411 return "f32";
1412 case Type::DoubleTyID:
1413 return "f64";
1414 case Type::PointerTyID: {
1415 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1416 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1417
1418 if (PtrSize == 64)
1419 if (useB4PTR)
1420 return "b64";
1421 else
1422 return "u64";
1423 else if (useB4PTR)
1424 return "b32";
1425 else
1426 return "u32";
1427 }
1428 default:
1429 break;
1430 }
1431 llvm_unreachable("unexpected type");
1432}
1433
1434void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1435 raw_ostream &O,
1436 const NVPTXSubtarget &STI) {
1437 const DataLayout &DL = getDataLayout();
1438
1439 // GlobalVariables are always constant pointers themselves.
1440 Type *ETy = GVar->getValueType();
1441
1442 O << ".";
1443 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1444 if (isManaged(*GVar)) {
1445 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1447 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1448 }
1449 O << " .attribute(.managed)";
1450 }
1451 if (MaybeAlign A = GVar->getAlign())
1452 O << " .align " << A->value();
1453 else
1454 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1455
1456 // Special case for i128
1457 if (ETy->isIntegerTy(128)) {
1458 O << " .b8 ";
1459 getSymbol(GVar)->print(O, MAI);
1460 O << "[16]";
1461 return;
1462 }
1463
1464 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1465 O << " .";
1466 O << getPTXFundamentalTypeStr(ETy);
1467 O << " ";
1468 getSymbol(GVar)->print(O, MAI);
1469 return;
1470 }
1471
1472 int64_t ElementSize = 0;
1473
1474 // Although PTX has direct support for struct type and array type and LLVM IR
1475 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1476 // support these high level field accesses. Structs and arrays are lowered
1477 // into arrays of bytes.
1478 switch (ETy->getTypeID()) {
1479 case Type::StructTyID:
1480 case Type::ArrayTyID:
1482 ElementSize = DL.getTypeStoreSize(ETy);
1483 O << " .b8 ";
1484 getSymbol(GVar)->print(O, MAI);
1485 O << "[";
1486 if (ElementSize) {
1487 O << ElementSize;
1488 }
1489 O << "]";
1490 break;
1491 default:
1492 llvm_unreachable("type not supported yet");
1493 }
1494}
1495
1496void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1497 const DataLayout &DL = getDataLayout();
1498 const AttributeList &PAL = F->getAttributes();
1499 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1500 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1501
1503 unsigned paramIndex = 0;
1504 bool first = true;
1505 bool isKernelFunc = isKernelFunction(*F);
1506 bool isABI = (STI.getSmVersion() >= 20);
1507 bool hasImageHandles = STI.hasImageHandles();
1508
1509 if (F->arg_empty() && !F->isVarArg()) {
1510 O << "()";
1511 return;
1512 }
1513
1514 O << "(\n";
1515
1516 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1517 Type *Ty = I->getType();
1518
1519 if (!first)
1520 O << ",\n";
1521
1522 first = false;
1523
1524 // Handle image/sampler parameters
1525 if (isKernelFunction(*F)) {
1526 if (isSampler(*I) || isImage(*I)) {
1527 if (isImage(*I)) {
1528 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1529 if (hasImageHandles)
1530 O << "\t.param .u64 .ptr .surfref ";
1531 else
1532 O << "\t.param .surfref ";
1533 O << TLI->getParamName(F, paramIndex);
1534 }
1535 else { // Default image is read_only
1536 if (hasImageHandles)
1537 O << "\t.param .u64 .ptr .texref ";
1538 else
1539 O << "\t.param .texref ";
1540 O << TLI->getParamName(F, paramIndex);
1541 }
1542 } else {
1543 if (hasImageHandles)
1544 O << "\t.param .u64 .ptr .samplerref ";
1545 else
1546 O << "\t.param .samplerref ";
1547 O << TLI->getParamName(F, paramIndex);
1548 }
1549 continue;
1550 }
1551 }
1552
1553 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1554 paramIndex](Type *Ty) -> Align {
1555 if (MaybeAlign StackAlign =
1556 getAlign(*F, paramIndex + AttributeList::FirstArgIndex))
1557 return StackAlign.value();
1558
1559 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1560 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1561 return std::max(TypeAlign, ParamAlign.valueOrOne());
1562 };
1563
1564 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1565 if (ShouldPassAsArray(Ty)) {
1566 // Just print .param .align <a> .b8 .param[size];
1567 // <a> = optimal alignment for the element type; always multiple of
1568 // PAL.getParamAlignment
1569 // size = typeallocsize of element type
1570 Align OptimalAlign = getOptimalAlignForParam(Ty);
1571
1572 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1573 O << TLI->getParamName(F, paramIndex);
1574 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1575
1576 continue;
1577 }
1578 // Just a scalar
1579 auto *PTy = dyn_cast<PointerType>(Ty);
1580 unsigned PTySizeInBits = 0;
1581 if (PTy) {
1582 PTySizeInBits =
1583 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1584 assert(PTySizeInBits && "Invalid pointer size");
1585 }
1586
1587 if (isKernelFunc) {
1588 if (PTy) {
1589 // Special handling for pointer arguments to kernel
1590 O << "\t.param .u" << PTySizeInBits << " ";
1591
1592 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1593 NVPTX::CUDA) {
1594 int addrSpace = PTy->getAddressSpace();
1595 switch (addrSpace) {
1596 default:
1597 O << ".ptr ";
1598 break;
1600 O << ".ptr .const ";
1601 break;
1603 O << ".ptr .shared ";
1604 break;
1606 O << ".ptr .global ";
1607 break;
1608 }
1609 Align ParamAlign = I->getParamAlign().valueOrOne();
1610 O << ".align " << ParamAlign.value() << " ";
1611 }
1612 O << TLI->getParamName(F, paramIndex);
1613 continue;
1614 }
1615
1616 // non-pointer scalar to kernel func
1617 O << "\t.param .";
1618 // Special case: predicate operands become .u8 types
1619 if (Ty->isIntegerTy(1))
1620 O << "u8";
1621 else
1622 O << getPTXFundamentalTypeStr(Ty);
1623 O << " ";
1624 O << TLI->getParamName(F, paramIndex);
1625 continue;
1626 }
1627 // Non-kernel function, just print .param .b<size> for ABI
1628 // and .reg .b<size> for non-ABI
1629 unsigned sz = 0;
1630 if (isa<IntegerType>(Ty)) {
1631 sz = cast<IntegerType>(Ty)->getBitWidth();
1633 } else if (PTy) {
1634 assert(PTySizeInBits && "Invalid pointer size");
1635 sz = PTySizeInBits;
1636 } else
1637 sz = Ty->getPrimitiveSizeInBits();
1638 if (isABI)
1639 O << "\t.param .b" << sz << " ";
1640 else
1641 O << "\t.reg .b" << sz << " ";
1642 O << TLI->getParamName(F, paramIndex);
1643 continue;
1644 }
1645
1646 // param has byVal attribute.
1647 Type *ETy = PAL.getParamByValType(paramIndex);
1648 assert(ETy && "Param should have byval type");
1649
1650 if (isABI || isKernelFunc) {
1651 // Just print .param .align <a> .b8 .param[size];
1652 // <a> = optimal alignment for the element type; always multiple of
1653 // PAL.getParamAlignment
1654 // size = typeallocsize of element type
1655 Align OptimalAlign =
1656 isKernelFunc
1657 ? getOptimalAlignForParam(ETy)
1658 : TLI->getFunctionByValParamAlign(
1659 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1660
1661 unsigned sz = DL.getTypeAllocSize(ETy);
1662 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1663 O << TLI->getParamName(F, paramIndex);
1664 O << "[" << sz << "]";
1665 continue;
1666 } else {
1667 // Split the ETy into constituent parts and
1668 // print .param .b<size> <name> for each part.
1669 // Further, if a part is vector, print the above for
1670 // each vector element.
1671 SmallVector<EVT, 16> vtparts;
1672 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1673 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1674 unsigned elems = 1;
1675 EVT elemtype = vtparts[i];
1676 if (vtparts[i].isVector()) {
1677 elems = vtparts[i].getVectorNumElements();
1678 elemtype = vtparts[i].getVectorElementType();
1679 }
1680
1681 for (unsigned j = 0, je = elems; j != je; ++j) {
1682 unsigned sz = elemtype.getSizeInBits();
1683 if (elemtype.isInteger())
1685 O << "\t.reg .b" << sz << " ";
1686 O << TLI->getParamName(F, paramIndex);
1687 if (j < je - 1)
1688 O << ",\n";
1689 ++paramIndex;
1690 }
1691 if (i < e - 1)
1692 O << ",\n";
1693 }
1694 --paramIndex;
1695 continue;
1696 }
1697 }
1698
1699 if (F->isVarArg()) {
1700 if (!first)
1701 O << ",\n";
1702 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1703 O << " .b8 ";
1704 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1705 }
1706
1707 O << "\n)";
1708}
1709
1710void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1711 const MachineFunction &MF) {
1712 SmallString<128> Str;
1714
1715 // Map the global virtual register number to a register class specific
1716 // virtual register number starting from 1 with that class.
1718 //unsigned numRegClasses = TRI->getNumRegClasses();
1719
1720 // Emit the Fake Stack Object
1721 const MachineFrameInfo &MFI = MF.getFrameInfo();
1722 int64_t NumBytes = MFI.getStackSize();
1723 if (NumBytes) {
1724 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1725 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1726 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1727 O << "\t.reg .b64 \t%SP;\n";
1728 O << "\t.reg .b64 \t%SPL;\n";
1729 } else {
1730 O << "\t.reg .b32 \t%SP;\n";
1731 O << "\t.reg .b32 \t%SPL;\n";
1732 }
1733 }
1734
1735 // Go through all virtual registers to establish the mapping between the
1736 // global virtual
1737 // register number and the per class virtual register number.
1738 // We use the per class virtual register number in the ptx output.
1739 unsigned int numVRs = MRI->getNumVirtRegs();
1740 for (unsigned i = 0; i < numVRs; i++) {
1742 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1743 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1744 int n = regmap.size();
1745 regmap.insert(std::make_pair(vr, n + 1));
1746 }
1747
1748 // Emit register declarations
1749 // @TODO: Extract out the real register usage
1750 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1751 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1752 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1753 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1754 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1755 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1756 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1757
1758 // Emit declaration of the virtual registers or 'physical' registers for
1759 // each register class
1760 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1761 const TargetRegisterClass *RC = TRI->getRegClass(i);
1762 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1763 std::string rcname = getNVPTXRegClassName(RC);
1764 std::string rcStr = getNVPTXRegClassStr(RC);
1765 int n = regmap.size();
1766
1767 // Only declare those registers that may be used.
1768 if (n) {
1769 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1770 << ">;\n";
1771 }
1772 }
1773
1774 OutStreamer->emitRawText(O.str());
1775}
1776
1777void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1778 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1779 bool ignored;
1780 unsigned int numHex;
1781 const char *lead;
1782
1783 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1784 numHex = 8;
1785 lead = "0f";
1787 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1788 numHex = 16;
1789 lead = "0d";
1791 } else
1792 llvm_unreachable("unsupported fp type");
1793
1794 APInt API = APF.bitcastToAPInt();
1795 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1796}
1797
1798void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1799 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1800 O << CI->getValue();
1801 return;
1802 }
1803 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1804 printFPConstant(CFP, O);
1805 return;
1806 }
1807 if (isa<ConstantPointerNull>(CPV)) {
1808 O << "0";
1809 return;
1810 }
1811 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1812 bool IsNonGenericPointer = false;
1813 if (GVar->getType()->getAddressSpace() != 0) {
1814 IsNonGenericPointer = true;
1815 }
1816 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1817 O << "generic(";
1818 getSymbol(GVar)->print(O, MAI);
1819 O << ")";
1820 } else {
1821 getSymbol(GVar)->print(O, MAI);
1822 }
1823 return;
1824 }
1825 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1826 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1827 printMCExpr(*E, O);
1828 return;
1829 }
1830 llvm_unreachable("Not scalar type found in printScalarConstant()");
1831}
1832
1833void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1834 AggBuffer *AggBuffer) {
1835 const DataLayout &DL = getDataLayout();
1836 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1837 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1838 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1839 // only the space allocated by CPV.
1840 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1841 return;
1842 }
1843
1844 // Helper for filling AggBuffer with APInts.
1845 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1846 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1847 SmallVector<unsigned char, 16> Buf(NumBytes);
1848 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1849 // input's bit width, and i1 arrays may not have a length that is a multuple
1850 // of 8. We handle the last byte separately, so we never request out of
1851 // bounds bits.
1852 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1853 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1854 }
1855 size_t LastBytePosition = (NumBytes - 1) * 8;
1856 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1857 Buf[NumBytes - 1] =
1858 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1859 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1860 };
1861
1862 switch (CPV->getType()->getTypeID()) {
1863 case Type::IntegerTyID:
1864 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1865 AddIntToBuffer(CI->getValue());
1866 break;
1867 }
1868 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1869 if (const auto *CI =
1870 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1871 AddIntToBuffer(CI->getValue());
1872 break;
1873 }
1874 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1875 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1876 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1877 AggBuffer->addZeros(AllocSize);
1878 break;
1879 }
1880 }
1881 llvm_unreachable("unsupported integer const type");
1882 break;
1883
1884 case Type::HalfTyID:
1885 case Type::BFloatTyID:
1886 case Type::FloatTyID:
1887 case Type::DoubleTyID:
1888 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1889 break;
1890
1891 case Type::PointerTyID: {
1892 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1893 AggBuffer->addSymbol(GVar, GVar);
1894 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1895 const Value *v = Cexpr->stripPointerCasts();
1896 AggBuffer->addSymbol(v, Cexpr);
1897 }
1898 AggBuffer->addZeros(AllocSize);
1899 break;
1900 }
1901
1902 case Type::ArrayTyID:
1904 case Type::StructTyID: {
1905 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1906 bufferAggregateConstant(CPV, AggBuffer);
1907 if (Bytes > AllocSize)
1908 AggBuffer->addZeros(Bytes - AllocSize);
1909 } else if (isa<ConstantAggregateZero>(CPV))
1910 AggBuffer->addZeros(Bytes);
1911 else
1912 llvm_unreachable("Unexpected Constant type");
1913 break;
1914 }
1915
1916 default:
1917 llvm_unreachable("unsupported type");
1918 }
1919}
1920
1921void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1922 AggBuffer *aggBuffer) {
1923 const DataLayout &DL = getDataLayout();
1924 int Bytes;
1925
1926 // Integers of arbitrary width
1927 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1928 APInt Val = CI->getValue();
1929 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1930 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1931 aggBuffer->addBytes(&Byte, 1, 1);
1932 Val.lshrInPlace(8);
1933 }
1934 return;
1935 }
1936
1937 // Old constants
1938 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1939 if (CPV->getNumOperands())
1940 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1941 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1942 return;
1943 }
1944
1945 if (const ConstantDataSequential *CDS =
1946 dyn_cast<ConstantDataSequential>(CPV)) {
1947 if (CDS->getNumElements())
1948 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1949 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1950 aggBuffer);
1951 return;
1952 }
1953
1954 if (isa<ConstantStruct>(CPV)) {
1955 if (CPV->getNumOperands()) {
1956 StructType *ST = cast<StructType>(CPV->getType());
1957 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1958 if (i == (e - 1))
1959 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1960 DL.getTypeAllocSize(ST) -
1961 DL.getStructLayout(ST)->getElementOffset(i);
1962 else
1963 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1964 DL.getStructLayout(ST)->getElementOffset(i);
1965 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1966 }
1967 }
1968 return;
1969 }
1970 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1971}
1972
1973/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1974/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1975/// expressions that are representable in PTX and create
1976/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1977const MCExpr *
1978NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1979 MCContext &Ctx = OutContext;
1980
1981 if (CV->isNullValue() || isa<UndefValue>(CV))
1982 return MCConstantExpr::create(0, Ctx);
1983
1984 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1985 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1986
1987 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1988 const MCSymbolRefExpr *Expr =
1990 if (ProcessingGeneric) {
1991 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1992 } else {
1993 return Expr;
1994 }
1995 }
1996
1997 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1998 if (!CE) {
1999 llvm_unreachable("Unknown constant value to lower!");
2000 }
2001
2002 switch (CE->getOpcode()) {
2003 default:
2004 break; // Error
2005
2006 case Instruction::AddrSpaceCast: {
2007 // Strip the addrspacecast and pass along the operand
2008 PointerType *DstTy = cast<PointerType>(CE->getType());
2009 if (DstTy->getAddressSpace() == 0)
2010 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2011
2012 break; // Error
2013 }
2014
2015 case Instruction::GetElementPtr: {
2016 const DataLayout &DL = getDataLayout();
2017
2018 // Generate a symbolic expression for the byte address
2019 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2020 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2021
2022 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2023 ProcessingGeneric);
2024 if (!OffsetAI)
2025 return Base;
2026
2027 int64_t Offset = OffsetAI.getSExtValue();
2029 Ctx);
2030 }
2031
2032 case Instruction::Trunc:
2033 // We emit the value and depend on the assembler to truncate the generated
2034 // expression properly. This is important for differences between
2035 // blockaddress labels. Since the two labels are in the same function, it
2036 // is reasonable to treat their delta as a 32-bit value.
2037 [[fallthrough]];
2038 case Instruction::BitCast:
2039 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2040
2041 case Instruction::IntToPtr: {
2042 const DataLayout &DL = getDataLayout();
2043
2044 // Handle casts to pointers by changing them into casts to the appropriate
2045 // integer type. This promotes constant folding and simplifies this code.
2046 Constant *Op = CE->getOperand(0);
2047 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2048 /*IsSigned*/ false, DL);
2049 if (Op)
2050 return lowerConstantForGV(Op, ProcessingGeneric);
2051
2052 break; // Error
2053 }
2054
2055 case Instruction::PtrToInt: {
2056 const DataLayout &DL = getDataLayout();
2057
2058 // Support only foldable casts to/from pointers that can be eliminated by
2059 // changing the pointer to the appropriately sized integer type.
2060 Constant *Op = CE->getOperand(0);
2061 Type *Ty = CE->getType();
2062
2063 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2064
2065 // We can emit the pointer value into this slot if the slot is an
2066 // integer slot equal to the size of the pointer.
2067 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2068 return OpExpr;
2069
2070 // Otherwise the pointer is smaller than the resultant integer, mask off
2071 // the high bits so we are sure to get a proper truncation if the input is
2072 // a constant expr.
2073 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2074 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2075 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2076 }
2077
2078 // The MC library also has a right-shift operator, but it isn't consistently
2079 // signed or unsigned between different targets.
2080 case Instruction::Add: {
2081 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2082 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2083 switch (CE->getOpcode()) {
2084 default: llvm_unreachable("Unknown binary operator constant cast expr");
2085 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2086 }
2087 }
2088 }
2089
2090 // If the code isn't optimized, there may be outstanding folding
2091 // opportunities. Attempt to fold the expression using DataLayout as a
2092 // last resort before giving up.
2094 if (C != CE)
2095 return lowerConstantForGV(C, ProcessingGeneric);
2096
2097 // Otherwise report the problem to the user.
2098 std::string S;
2100 OS << "Unsupported expression in static initializer: ";
2101 CE->printAsOperand(OS, /*PrintType=*/false,
2102 !MF ? nullptr : MF->getFunction().getParent());
2103 report_fatal_error(Twine(OS.str()));
2104}
2105
2106// Copy of MCExpr::print customized for NVPTX
2107void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2108 switch (Expr.getKind()) {
2109 case MCExpr::Target:
2110 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2111 case MCExpr::Constant:
2112 OS << cast<MCConstantExpr>(Expr).getValue();
2113 return;
2114
2115 case MCExpr::SymbolRef: {
2116 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2117 const MCSymbol &Sym = SRE.getSymbol();
2118 Sym.print(OS, MAI);
2119 return;
2120 }
2121
2122 case MCExpr::Unary: {
2123 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2124 switch (UE.getOpcode()) {
2125 case MCUnaryExpr::LNot: OS << '!'; break;
2126 case MCUnaryExpr::Minus: OS << '-'; break;
2127 case MCUnaryExpr::Not: OS << '~'; break;
2128 case MCUnaryExpr::Plus: OS << '+'; break;
2129 }
2130 printMCExpr(*UE.getSubExpr(), OS);
2131 return;
2132 }
2133
2134 case MCExpr::Binary: {
2135 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2136
2137 // Only print parens around the LHS if it is non-trivial.
2138 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2139 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2140 printMCExpr(*BE.getLHS(), OS);
2141 } else {
2142 OS << '(';
2143 printMCExpr(*BE.getLHS(), OS);
2144 OS<< ')';
2145 }
2146
2147 switch (BE.getOpcode()) {
2148 case MCBinaryExpr::Add:
2149 // Print "X-42" instead of "X+-42".
2150 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2151 if (RHSC->getValue() < 0) {
2152 OS << RHSC->getValue();
2153 return;
2154 }
2155 }
2156
2157 OS << '+';
2158 break;
2159 default: llvm_unreachable("Unhandled binary operator");
2160 }
2161
2162 // Only print parens around the LHS if it is non-trivial.
2163 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2164 printMCExpr(*BE.getRHS(), OS);
2165 } else {
2166 OS << '(';
2167 printMCExpr(*BE.getRHS(), OS);
2168 OS << ')';
2169 }
2170 return;
2171 }
2172 }
2173
2174 llvm_unreachable("Invalid expression kind!");
2175}
2176
2177/// PrintAsmOperand - Print out an operand for an inline asm expression.
2178///
2179bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2180 const char *ExtraCode, raw_ostream &O) {
2181 if (ExtraCode && ExtraCode[0]) {
2182 if (ExtraCode[1] != 0)
2183 return true; // Unknown modifier.
2184
2185 switch (ExtraCode[0]) {
2186 default:
2187 // See if this is a generic print operand
2188 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2189 case 'r':
2190 break;
2191 }
2192 }
2193
2194 printOperand(MI, OpNo, O);
2195
2196 return false;
2197}
2198
2199bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2200 unsigned OpNo,
2201 const char *ExtraCode,
2202 raw_ostream &O) {
2203 if (ExtraCode && ExtraCode[0])
2204 return true; // Unknown modifier
2205
2206 O << '[';
2207 printMemOperand(MI, OpNo, O);
2208 O << ']';
2209
2210 return false;
2211}
2212
2213void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2214 raw_ostream &O) {
2215 const MachineOperand &MO = MI->getOperand(OpNum);
2216 switch (MO.getType()) {
2218 if (MO.getReg().isPhysical()) {
2219 if (MO.getReg() == NVPTX::VRDepot)
2221 else
2223 } else {
2224 emitVirtualRegister(MO.getReg(), O);
2225 }
2226 break;
2227
2229 O << MO.getImm();
2230 break;
2231
2233 printFPConstant(MO.getFPImm(), O);
2234 break;
2235
2237 PrintSymbolOperand(MO, O);
2238 break;
2239
2241 MO.getMBB()->getSymbol()->print(O, MAI);
2242 break;
2243
2244 default:
2245 llvm_unreachable("Operand type not supported.");
2246 }
2247}
2248
2249void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2250 raw_ostream &O, const char *Modifier) {
2251 printOperand(MI, OpNum, O);
2252
2253 if (Modifier && strcmp(Modifier, "add") == 0) {
2254 O << ", ";
2255 printOperand(MI, OpNum + 1, O);
2256 } else {
2257 if (MI->getOperand(OpNum + 1).isImm() &&
2258 MI->getOperand(OpNum + 1).getImm() == 0)
2259 return; // don't print ',0' or '+0'
2260 O << "+";
2261 printOperand(MI, OpNum + 1, O);
2262 }
2263}
2264
2265// Force static initialization.
2269}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static cl::opt< bool > LowerCtorDtor("nvptx-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(false), cl::Hidden)
static bool ShouldPassAsArray(Type *Ty)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:49
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5282
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:837
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:704
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:419
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:88
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:91
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:103
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:450
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:395
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:122
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:106
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:95
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(Twine Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:396
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:100
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:403
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:423
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:414
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:208
ConstantArray - Constant Array Declarations.
Definition: Constants.h:424
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:584
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1084
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Subprogram description.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1830
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:578
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:110
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:514
bool hasExternalLinkage() const
Definition: GlobalValue.h:510
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:286
bool hasLocalLinkage() const
Definition: GlobalValue.h:527
bool hasPrivateLinkage() const
Definition: GlobalValue.h:526
unsigned getAddressSpace() const
Definition: GlobalValue.h:204
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:293
bool hasWeakLinkage() const
Definition: GlobalValue.h:521
bool hasCommonLinkage() const
Definition: GlobalValue.h:531
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:511
Type * getValueType() const
Definition: GlobalValue.h:295
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:546
Binary assembler expressions.
Definition: MCExpr.h:492
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:639
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:642
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:541
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:536
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:636
@ Add
Addition.
Definition: MCExpr.h:495
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:83
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:212
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ Unary
Unary expressions.
Definition: MCExpr.h:41
@ Constant
Constant expressions.
Definition: MCExpr.h:39
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
@ Target
Target specific expression.
Definition: MCExpr.h:42
@ Binary
Binary expressions.
Definition: MCExpr.h:38
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:410
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:436
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:479
@ Minus
Unary minus.
Definition: MCExpr.h:440
@ Plus
Unary plus.
Definition: MCExpr.h:442
@ Not
Bitwise negation.
Definition: MCExpr.h:441
@ LNot
Logical negation.
Definition: MCExpr.h:439
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:482
Metadata node.
Definition: Metadata.h:1067
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool hasDebugInfo() const
Returns true if valid debug info is present.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:59
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:679
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:258
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:216
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:74
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:73
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:74
@ CUDA
Definition: NVPTX.h:75
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:31
uint64_t read64le(const void *P)
Definition: Endian.h:428
uint32_t read32le(const void *P)
Definition: Endian.h:425
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::optional< unsigned > getMaxNTIDy(const Function &F)
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
std::optional< unsigned > getMaxNTIDz(const Function &F)
MaybeAlign getAlign(const Function &F, unsigned Index)
std::optional< unsigned > getMaxNTIDx(const Function &F)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::string getSurfaceName(const Value &val)
std::optional< unsigned > getReqNTIDy(const Function &F)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:200
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
std::optional< unsigned > getReqNTIDz(const Function &F)
std::optional< unsigned > getReqNTIDx(const Function &F)
bool isSampler(const Value &val)
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool getMaxClusterRank(const Function &F, unsigned &x)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:271
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:272
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...