LLVM 19.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalAlias.h"
61#include "llvm/IR/GlobalValue.h"
63#include "llvm/IR/Instruction.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/MC/MCExpr.h"
70#include "llvm/MC/MCInst.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/MC/MCStreamer.h"
73#include "llvm/MC/MCSymbol.h"
78#include "llvm/Support/Endian.h"
81#include "llvm/Support/Path.h"
87#include <cassert>
88#include <cstdint>
89#include <cstring>
90#include <new>
91#include <string>
92#include <utility>
93#include <vector>
94
95using namespace llvm;
96
97static cl::opt<bool>
98 LowerCtorDtor("nvptx-lower-global-ctor-dtor",
99 cl::desc("Lower GPU ctor / dtors to globals on the device."),
100 cl::init(false), cl::Hidden);
101
102#define DEPOTNAME "__local_depot"
103
104/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
105/// depends.
106static void
109 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
110 Globals.insert(GV);
111 else {
112 if (const User *U = dyn_cast<User>(V)) {
113 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
114 DiscoverDependentGlobals(U->getOperand(i), Globals);
115 }
116 }
117 }
118}
119
120/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
121/// instances to be emitted, but only after any dependents have been added
122/// first.s
123static void
128 // Have we already visited this one?
129 if (Visited.count(GV))
130 return;
131
132 // Do we have a circular dependency?
133 if (!Visiting.insert(GV).second)
134 report_fatal_error("Circular dependency found in global variable set");
135
136 // Make sure we visit all dependents first
138 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
139 DiscoverDependentGlobals(GV->getOperand(i), Others);
140
141 for (const GlobalVariable *GV : Others)
142 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
143
144 // Now we can visit ourself
145 Order.push_back(GV);
146 Visited.insert(GV);
147 Visiting.erase(GV);
148}
149
150void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
151 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
152 getSubtargetInfo().getFeatureBits());
153
154 MCInst Inst;
155 lowerToMCInst(MI, Inst);
157}
158
159// Handle symbol backtracking for targets that do not support image handles
160bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
161 unsigned OpNo, MCOperand &MCOp) {
162 const MachineOperand &MO = MI->getOperand(OpNo);
163 const MCInstrDesc &MCID = MI->getDesc();
164
165 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
166 // This is a texture fetch, so operand 4 is a texref and operand 5 is
167 // a samplerref
168 if (OpNo == 4 && MO.isImm()) {
169 lowerImageHandleSymbol(MO.getImm(), MCOp);
170 return true;
171 }
172 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
173 lowerImageHandleSymbol(MO.getImm(), MCOp);
174 return true;
175 }
176
177 return false;
178 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
179 unsigned VecSize =
180 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
181
182 // For a surface load of vector size N, the Nth operand will be the surfref
183 if (OpNo == VecSize && MO.isImm()) {
184 lowerImageHandleSymbol(MO.getImm(), MCOp);
185 return true;
186 }
187
188 return false;
189 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
190 // This is a surface store, so operand 0 is a surfref
191 if (OpNo == 0 && MO.isImm()) {
192 lowerImageHandleSymbol(MO.getImm(), MCOp);
193 return true;
194 }
195
196 return false;
197 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
198 // This is a query, so operand 1 is a surfref/texref
199 if (OpNo == 1 && MO.isImm()) {
200 lowerImageHandleSymbol(MO.getImm(), MCOp);
201 return true;
202 }
203
204 return false;
205 }
206
207 return false;
208}
209
210void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
211 // Ewwww
213 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
215 const char *Sym = MFI->getImageHandleSymbol(Index);
216 StringRef SymName = nvTM.getStrPool().save(Sym);
217 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
218}
219
220void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
221 OutMI.setOpcode(MI->getOpcode());
222 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
223 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
224 const MachineOperand &MO = MI->getOperand(0);
225 OutMI.addOperand(GetSymbolRef(
227 return;
228 }
229
230 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
231 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
232 const MachineOperand &MO = MI->getOperand(i);
233
234 MCOperand MCOp;
235 if (!STI.hasImageHandles()) {
236 if (lowerImageHandleOperand(MI, i, MCOp)) {
237 OutMI.addOperand(MCOp);
238 continue;
239 }
240 }
241
242 if (lowerOperand(MO, MCOp))
243 OutMI.addOperand(MCOp);
244 }
245}
246
247bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
248 MCOperand &MCOp) {
249 switch (MO.getType()) {
250 default: llvm_unreachable("unknown operand type");
252 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
253 break;
255 MCOp = MCOperand::createImm(MO.getImm());
256 break;
259 MO.getMBB()->getSymbol(), OutContext));
260 break;
262 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
263 break;
265 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
266 break;
268 const ConstantFP *Cnt = MO.getFPImm();
269 const APFloat &Val = Cnt->getValueAPF();
270
271 switch (Cnt->getType()->getTypeID()) {
272 default: report_fatal_error("Unsupported FP type"); break;
273 case Type::HalfTyID:
276 break;
277 case Type::BFloatTyID:
280 break;
281 case Type::FloatTyID:
284 break;
285 case Type::DoubleTyID:
288 break;
289 }
290 break;
291 }
292 }
293 return true;
294}
295
296unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
298 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
299
300 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
301 unsigned RegNum = RegMap[Reg];
302
303 // Encode the register class in the upper 4 bits
304 // Must be kept in sync with NVPTXInstPrinter::printRegName
305 unsigned Ret = 0;
306 if (RC == &NVPTX::Int1RegsRegClass) {
307 Ret = (1 << 28);
308 } else if (RC == &NVPTX::Int16RegsRegClass) {
309 Ret = (2 << 28);
310 } else if (RC == &NVPTX::Int32RegsRegClass) {
311 Ret = (3 << 28);
312 } else if (RC == &NVPTX::Int64RegsRegClass) {
313 Ret = (4 << 28);
314 } else if (RC == &NVPTX::Float32RegsRegClass) {
315 Ret = (5 << 28);
316 } else if (RC == &NVPTX::Float64RegsRegClass) {
317 Ret = (6 << 28);
318 } else {
319 report_fatal_error("Bad register class");
320 }
321
322 // Insert the vreg number
323 Ret |= (RegNum & 0x0FFFFFFF);
324 return Ret;
325 } else {
326 // Some special-use registers are actually physical registers.
327 // Encode this as the register class ID of 0 and the real register ID.
328 return Reg & 0x0FFFFFFF;
329 }
330}
331
332MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
333 const MCExpr *Expr;
335 OutContext);
336 return MCOperand::createExpr(Expr);
337}
338
339static bool ShouldPassAsArray(Type *Ty) {
340 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
341 Ty->isHalfTy() || Ty->isBFloatTy();
342}
343
344void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
345 const DataLayout &DL = getDataLayout();
347 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
348
349 Type *Ty = F->getReturnType();
350
351 bool isABI = (STI.getSmVersion() >= 20);
352
353 if (Ty->getTypeID() == Type::VoidTyID)
354 return;
355 O << " (";
356
357 if (isABI) {
358 if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
359 !ShouldPassAsArray(Ty)) {
360 unsigned size = 0;
361 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
362 size = ITy->getBitWidth();
363 } else {
364 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
366 }
368 O << ".param .b" << size << " func_retval0";
369 } else if (isa<PointerType>(Ty)) {
370 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
371 << " func_retval0";
372 } else if (ShouldPassAsArray(Ty)) {
373 unsigned totalsz = DL.getTypeAllocSize(Ty);
374 Align RetAlignment = TLI->getFunctionArgumentAlignment(
376 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
377 << totalsz << "]";
378 } else
379 llvm_unreachable("Unknown return type");
380 } else {
381 SmallVector<EVT, 16> vtparts;
382 ComputeValueVTs(*TLI, DL, Ty, vtparts);
383 unsigned idx = 0;
384 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
385 unsigned elems = 1;
386 EVT elemtype = vtparts[i];
387 if (vtparts[i].isVector()) {
388 elems = vtparts[i].getVectorNumElements();
389 elemtype = vtparts[i].getVectorElementType();
390 }
391
392 for (unsigned j = 0, je = elems; j != je; ++j) {
393 unsigned sz = elemtype.getSizeInBits();
394 if (elemtype.isInteger())
396 O << ".reg .b" << sz << " func_retval" << idx;
397 if (j < je - 1)
398 O << ", ";
399 ++idx;
400 }
401 if (i < e - 1)
402 O << ", ";
403 }
404 }
405 O << ") ";
406}
407
408void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
409 raw_ostream &O) {
410 const Function &F = MF.getFunction();
411 printReturnValStr(&F, O);
412}
413
414// Return true if MBB is the header of a loop marked with
415// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
416bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
417 const MachineBasicBlock &MBB) const {
418 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
419 // We insert .pragma "nounroll" only to the loop header.
420 if (!LI.isLoopHeader(&MBB))
421 return false;
422
423 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
424 // we iterate through each back edge of the loop with header MBB, and check
425 // whether its metadata contains llvm.loop.unroll.disable.
426 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
427 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
428 // Edges from other loops to MBB are not back edges.
429 continue;
430 }
431 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
432 if (MDNode *LoopID =
433 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
434 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
435 return true;
436 if (MDNode *UnrollCountMD =
437 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
438 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
439 ->isOne())
440 return true;
441 }
442 }
443 }
444 }
445 return false;
446}
447
448void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
450 if (isLoopHeaderOfNoUnroll(MBB))
451 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
452}
453
454void NVPTXAsmPrinter::emitFunctionEntryLabel() {
457
458 if (!GlobalsEmitted) {
459 emitGlobals(*MF->getFunction().getParent());
460 GlobalsEmitted = true;
461 }
462
463 // Set up
464 MRI = &MF->getRegInfo();
465 F = &MF->getFunction();
466 emitLinkageDirective(F, O);
467 if (isKernelFunction(*F))
468 O << ".entry ";
469 else {
470 O << ".func ";
471 printReturnValStr(*MF, O);
472 }
473
475
476 emitFunctionParamList(F, O);
477 O << "\n";
478
479 if (isKernelFunction(*F))
480 emitKernelFunctionDirectives(*F, O);
481
483 O << ".noreturn";
484
485 OutStreamer->emitRawText(O.str());
486
487 VRegMapping.clear();
488 // Emit open brace for function body.
489 OutStreamer->emitRawText(StringRef("{\n"));
490 setAndEmitFunctionVirtualRegisters(*MF);
491 // Emit initial .loc debug directive for correct relocation symbol data.
492 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
493 assert(SP->getUnit());
494 if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
496 }
497}
498
500 bool Result = AsmPrinter::runOnMachineFunction(F);
501 // Emit closing brace for the body of function F.
502 // The closing brace must be emitted here because we need to emit additional
503 // debug labels/data after the last basic block.
504 // We need to emit the closing brace here because we don't have function that
505 // finished emission of the function body.
506 OutStreamer->emitRawText(StringRef("}\n"));
507 return Result;
508}
509
510void NVPTXAsmPrinter::emitFunctionBodyStart() {
512 raw_svector_ostream O(Str);
513 emitDemotedVars(&MF->getFunction(), O);
514 OutStreamer->emitRawText(O.str());
515}
516
517void NVPTXAsmPrinter::emitFunctionBodyEnd() {
518 VRegMapping.clear();
519}
520
524 return OutContext.getOrCreateSymbol(Str);
525}
526
527void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
528 Register RegNo = MI->getOperand(0).getReg();
529 if (RegNo.isVirtual()) {
530 OutStreamer->AddComment(Twine("implicit-def: ") +
532 } else {
533 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
534 OutStreamer->AddComment(Twine("implicit-def: ") +
535 STI.getRegisterInfo()->getName(RegNo));
536 }
537 OutStreamer->addBlankLine();
538}
539
540void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
541 raw_ostream &O) const {
542 // If the NVVM IR has some of reqntid* specified, then output
543 // the reqntid directive, and set the unspecified ones to 1.
544 // If none of Reqntid* is specified, don't output reqntid directive.
545 unsigned Reqntidx, Reqntidy, Reqntidz;
546 Reqntidx = Reqntidy = Reqntidz = 1;
547 bool ReqSpecified = false;
548 ReqSpecified |= getReqNTIDx(F, Reqntidx);
549 ReqSpecified |= getReqNTIDy(F, Reqntidy);
550 ReqSpecified |= getReqNTIDz(F, Reqntidz);
551
552 if (ReqSpecified)
553 O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
554 << "\n";
555
556 // If the NVVM IR has some of maxntid* specified, then output
557 // the maxntid directive, and set the unspecified ones to 1.
558 // If none of maxntid* is specified, don't output maxntid directive.
559 unsigned Maxntidx, Maxntidy, Maxntidz;
560 Maxntidx = Maxntidy = Maxntidz = 1;
561 bool MaxSpecified = false;
562 MaxSpecified |= getMaxNTIDx(F, Maxntidx);
563 MaxSpecified |= getMaxNTIDy(F, Maxntidy);
564 MaxSpecified |= getMaxNTIDz(F, Maxntidz);
565
566 if (MaxSpecified)
567 O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
568 << "\n";
569
570 unsigned Mincta = 0;
571 if (getMinCTASm(F, Mincta))
572 O << ".minnctapersm " << Mincta << "\n";
573
574 unsigned Maxnreg = 0;
575 if (getMaxNReg(F, Maxnreg))
576 O << ".maxnreg " << Maxnreg << "\n";
577
578 // .maxclusterrank directive requires SM_90 or higher, make sure that we
579 // filter it out for lower SM versions, as it causes a hard ptxas crash.
580 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
581 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
582 unsigned Maxclusterrank = 0;
583 if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
584 O << ".maxclusterrank " << Maxclusterrank << "\n";
585}
586
587std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
588 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
589
590 std::string Name;
591 raw_string_ostream NameStr(Name);
592
593 VRegRCMap::const_iterator I = VRegMapping.find(RC);
594 assert(I != VRegMapping.end() && "Bad register class");
595 const DenseMap<unsigned, unsigned> &RegMap = I->second;
596
597 VRegMap::const_iterator VI = RegMap.find(Reg);
598 assert(VI != RegMap.end() && "Bad virtual register");
599 unsigned MappedVR = VI->second;
600
601 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
602
603 NameStr.flush();
604 return Name;
605}
606
607void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
608 raw_ostream &O) {
609 O << getVirtualRegisterName(vr);
610}
611
612void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
613 raw_ostream &O) {
614 const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject());
615 if (!F || isKernelFunction(*F) || F->isDeclaration())
617 "NVPTX aliasee must be a non-kernel function definition");
618
619 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
621 report_fatal_error("NVPTX aliasee must not be '.weak'");
622
623 emitDeclarationWithName(F, getSymbol(GA), O);
624}
625
626void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
627 emitDeclarationWithName(F, getSymbol(F), O);
628}
629
630void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
631 raw_ostream &O) {
632 emitLinkageDirective(F, O);
633 if (isKernelFunction(*F))
634 O << ".entry ";
635 else
636 O << ".func ";
637 printReturnValStr(F, O);
638 S->print(O, MAI);
639 O << "\n";
640 emitFunctionParamList(F, O);
641 O << "\n";
643 O << ".noreturn";
644 O << ";\n";
645}
646
647static bool usedInGlobalVarDef(const Constant *C) {
648 if (!C)
649 return false;
650
651 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
652 return GV->getName() != "llvm.used";
653 }
654
655 for (const User *U : C->users())
656 if (const Constant *C = dyn_cast<Constant>(U))
658 return true;
659
660 return false;
661}
662
663static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
664 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
665 if (othergv->getName() == "llvm.used")
666 return true;
667 }
668
669 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
670 if (instr->getParent() && instr->getParent()->getParent()) {
671 const Function *curFunc = instr->getParent()->getParent();
672 if (oneFunc && (curFunc != oneFunc))
673 return false;
674 oneFunc = curFunc;
675 return true;
676 } else
677 return false;
678 }
679
680 for (const User *UU : U->users())
681 if (!usedInOneFunc(UU, oneFunc))
682 return false;
683
684 return true;
685}
686
687/* Find out if a global variable can be demoted to local scope.
688 * Currently, this is valid for CUDA shared variables, which have local
689 * scope and global lifetime. So the conditions to check are :
690 * 1. Is the global variable in shared address space?
691 * 2. Does it have local linkage?
692 * 3. Is the global variable referenced only in one function?
693 */
694static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
695 if (!gv->hasLocalLinkage())
696 return false;
697 PointerType *Pty = gv->getType();
698 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
699 return false;
700
701 const Function *oneFunc = nullptr;
702
703 bool flag = usedInOneFunc(gv, oneFunc);
704 if (!flag)
705 return false;
706 if (!oneFunc)
707 return false;
708 f = oneFunc;
709 return true;
710}
711
712static bool useFuncSeen(const Constant *C,
714 for (const User *U : C->users()) {
715 if (const Constant *cu = dyn_cast<Constant>(U)) {
716 if (useFuncSeen(cu, seenMap))
717 return true;
718 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
719 const BasicBlock *bb = I->getParent();
720 if (!bb)
721 continue;
722 const Function *caller = bb->getParent();
723 if (!caller)
724 continue;
725 if (seenMap.contains(caller))
726 return true;
727 }
728 }
729 return false;
730}
731
732void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
734 for (const Function &F : M) {
735 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
736 emitDeclaration(&F, O);
737 continue;
738 }
739
740 if (F.isDeclaration()) {
741 if (F.use_empty())
742 continue;
743 if (F.getIntrinsicID())
744 continue;
745 emitDeclaration(&F, O);
746 continue;
747 }
748 for (const User *U : F.users()) {
749 if (const Constant *C = dyn_cast<Constant>(U)) {
750 if (usedInGlobalVarDef(C)) {
751 // The use is in the initialization of a global variable
752 // that is a function pointer, so print a declaration
753 // for the original function
754 emitDeclaration(&F, O);
755 break;
756 }
757 // Emit a declaration of this function if the function that
758 // uses this constant expr has already been seen.
759 if (useFuncSeen(C, seenMap)) {
760 emitDeclaration(&F, O);
761 break;
762 }
763 }
764
765 if (!isa<Instruction>(U))
766 continue;
767 const Instruction *instr = cast<Instruction>(U);
768 const BasicBlock *bb = instr->getParent();
769 if (!bb)
770 continue;
771 const Function *caller = bb->getParent();
772 if (!caller)
773 continue;
774
775 // If a caller has already been seen, then the caller is
776 // appearing in the module before the callee. so print out
777 // a declaration for the callee.
778 if (seenMap.contains(caller)) {
779 emitDeclaration(&F, O);
780 break;
781 }
782 }
783 seenMap[&F] = true;
784 }
785 for (const GlobalAlias &GA : M.aliases())
786 emitAliasDeclaration(&GA, O);
787}
788
790 if (!GV) return true;
791 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
792 if (!InitList) return true; // Not an array; we don't know how to parse.
793 return InitList->getNumOperands() == 0;
794}
795
796void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
797 // Construct a default subtarget off of the TargetMachine defaults. The
798 // rest of NVPTX isn't friendly to change subtargets per function and
799 // so the default TargetMachine will have all of the options.
800 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
801 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
802 SmallString<128> Str1;
803 raw_svector_ostream OS1(Str1);
804
805 // Emit header before any dwarf directives are emitted below.
806 emitHeader(M, OS1, *STI);
807 OutStreamer->emitRawText(OS1.str());
808}
809
811 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
812 const NVPTXSubtarget &STI =
813 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
814 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
815 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
816
817 // OpenMP supports NVPTX global constructors and destructors.
818 bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
819
820 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
821 !LowerCtorDtor && !IsOpenMP) {
823 "Module has a nontrivial global ctor, which NVPTX does not support.");
824 return true; // error
825 }
826 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
827 !LowerCtorDtor && !IsOpenMP) {
829 "Module has a nontrivial global dtor, which NVPTX does not support.");
830 return true; // error
831 }
832
833 // We need to call the parent's one explicitly.
834 bool Result = AsmPrinter::doInitialization(M);
835
836 GlobalsEmitted = false;
837
838 return Result;
839}
840
841void NVPTXAsmPrinter::emitGlobals(const Module &M) {
842 SmallString<128> Str2;
843 raw_svector_ostream OS2(Str2);
844
845 emitDeclarations(M, OS2);
846
847 // As ptxas does not support forward references of globals, we need to first
848 // sort the list of module-level globals in def-use order. We visit each
849 // global variable in order, and ensure that we emit it *after* its dependent
850 // globals. We use a little extra memory maintaining both a set and a list to
851 // have fast searches while maintaining a strict ordering.
855
856 // Visit each global variable, in order
857 for (const GlobalVariable &I : M.globals())
858 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
859
860 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
861 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
862
863 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
864 const NVPTXSubtarget &STI =
865 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
866
867 // Print out module-level global variables in proper order
868 for (unsigned i = 0, e = Globals.size(); i != e; ++i)
869 printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
870
871 OS2 << '\n';
872
873 OutStreamer->emitRawText(OS2.str());
874}
875
876void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
879
880 MCSymbol *Name = getSymbol(&GA);
881
882 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
883 << ";\n";
884
885 OutStreamer->emitRawText(OS.str());
886}
887
888void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
889 const NVPTXSubtarget &STI) {
890 O << "//\n";
891 O << "// Generated by LLVM NVPTX Back-End\n";
892 O << "//\n";
893 O << "\n";
894
895 unsigned PTXVersion = STI.getPTXVersion();
896 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
897
898 O << ".target ";
899 O << STI.getTargetName();
900
901 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
902 if (NTM.getDrvInterface() == NVPTX::NVCL)
903 O << ", texmode_independent";
904
905 bool HasFullDebugInfo = false;
906 for (DICompileUnit *CU : M.debug_compile_units()) {
907 switch(CU->getEmissionKind()) {
910 break;
913 HasFullDebugInfo = true;
914 break;
915 }
916 if (HasFullDebugInfo)
917 break;
918 }
919 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
920 O << ", debug";
921
922 O << "\n";
923
924 O << ".address_size ";
925 if (NTM.is64Bit())
926 O << "64";
927 else
928 O << "32";
929 O << "\n";
930
931 O << "\n";
932}
933
935 bool HasDebugInfo = MMI && MMI->hasDebugInfo();
936
937 // If we did not emit any functions, then the global declarations have not
938 // yet been emitted.
939 if (!GlobalsEmitted) {
940 emitGlobals(M);
941 GlobalsEmitted = true;
942 }
943
944 // call doFinalization
945 bool ret = AsmPrinter::doFinalization(M);
946
948
949 auto *TS =
950 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
951 // Close the last emitted section
952 if (HasDebugInfo) {
953 TS->closeLastSection();
954 // Emit empty .debug_loc section for better support of the empty files.
955 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
956 }
957
958 // Output last DWARF .file directives, if any.
959 TS->outputDwarfFileDirectives();
960
961 return ret;
962}
963
964// This function emits appropriate linkage directives for
965// functions and global variables.
966//
967// extern function declaration -> .extern
968// extern function definition -> .visible
969// external global variable with init -> .visible
970// external without init -> .extern
971// appending -> not allowed, assert.
972// for any linkage other than
973// internal, private, linker_private,
974// linker_private_weak, linker_private_weak_def_auto,
975// we emit -> .weak.
976
977void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
978 raw_ostream &O) {
979 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
980 if (V->hasExternalLinkage()) {
981 if (isa<GlobalVariable>(V)) {
982 const GlobalVariable *GVar = cast<GlobalVariable>(V);
983 if (GVar) {
984 if (GVar->hasInitializer())
985 O << ".visible ";
986 else
987 O << ".extern ";
988 }
989 } else if (V->isDeclaration())
990 O << ".extern ";
991 else
992 O << ".visible ";
993 } else if (V->hasAppendingLinkage()) {
994 std::string msg;
995 msg.append("Error: ");
996 msg.append("Symbol ");
997 if (V->hasName())
998 msg.append(std::string(V->getName()));
999 msg.append("has unsupported appending linkage type");
1000 llvm_unreachable(msg.c_str());
1001 } else if (!V->hasInternalLinkage() &&
1002 !V->hasPrivateLinkage()) {
1003 O << ".weak ";
1004 }
1005 }
1006}
1007
1008void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1009 raw_ostream &O, bool processDemoted,
1010 const NVPTXSubtarget &STI) {
1011 // Skip meta data
1012 if (GVar->hasSection()) {
1013 if (GVar->getSection() == "llvm.metadata")
1014 return;
1015 }
1016
1017 // Skip LLVM intrinsic global variables
1018 if (GVar->getName().starts_with("llvm.") ||
1019 GVar->getName().starts_with("nvvm."))
1020 return;
1021
1022 const DataLayout &DL = getDataLayout();
1023
1024 // GlobalVariables are always constant pointers themselves.
1025 Type *ETy = GVar->getValueType();
1026
1027 if (GVar->hasExternalLinkage()) {
1028 if (GVar->hasInitializer())
1029 O << ".visible ";
1030 else
1031 O << ".extern ";
1032 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
1034 O << ".common ";
1035 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1037 GVar->hasCommonLinkage()) {
1038 O << ".weak ";
1039 }
1040
1041 if (isTexture(*GVar)) {
1042 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1043 return;
1044 }
1045
1046 if (isSurface(*GVar)) {
1047 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1048 return;
1049 }
1050
1051 if (GVar->isDeclaration()) {
1052 // (extern) declarations, no definition or initializer
1053 // Currently the only known declaration is for an automatic __local
1054 // (.shared) promoted to global.
1055 emitPTXGlobalVariable(GVar, O, STI);
1056 O << ";\n";
1057 return;
1058 }
1059
1060 if (isSampler(*GVar)) {
1061 O << ".global .samplerref " << getSamplerName(*GVar);
1062
1063 const Constant *Initializer = nullptr;
1064 if (GVar->hasInitializer())
1065 Initializer = GVar->getInitializer();
1066 const ConstantInt *CI = nullptr;
1067 if (Initializer)
1068 CI = dyn_cast<ConstantInt>(Initializer);
1069 if (CI) {
1070 unsigned sample = CI->getZExtValue();
1071
1072 O << " = { ";
1073
1074 for (int i = 0,
1075 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1076 i < 3; i++) {
1077 O << "addr_mode_" << i << " = ";
1078 switch (addr) {
1079 case 0:
1080 O << "wrap";
1081 break;
1082 case 1:
1083 O << "clamp_to_border";
1084 break;
1085 case 2:
1086 O << "clamp_to_edge";
1087 break;
1088 case 3:
1089 O << "wrap";
1090 break;
1091 case 4:
1092 O << "mirror";
1093 break;
1094 }
1095 O << ", ";
1096 }
1097 O << "filter_mode = ";
1098 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1099 case 0:
1100 O << "nearest";
1101 break;
1102 case 1:
1103 O << "linear";
1104 break;
1105 case 2:
1106 llvm_unreachable("Anisotropic filtering is not supported");
1107 default:
1108 O << "nearest";
1109 break;
1110 }
1111 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1112 O << ", force_unnormalized_coords = 1";
1113 }
1114 O << " }";
1115 }
1116
1117 O << ";\n";
1118 return;
1119 }
1120
1121 if (GVar->hasPrivateLinkage()) {
1122 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1123 return;
1124
1125 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1126 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1127 return;
1128 if (GVar->use_empty())
1129 return;
1130 }
1131
1132 const Function *demotedFunc = nullptr;
1133 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1134 O << "// " << GVar->getName() << " has been demoted\n";
1135 if (localDecls.find(demotedFunc) != localDecls.end())
1136 localDecls[demotedFunc].push_back(GVar);
1137 else {
1138 std::vector<const GlobalVariable *> temp;
1139 temp.push_back(GVar);
1140 localDecls[demotedFunc] = temp;
1141 }
1142 return;
1143 }
1144
1145 O << ".";
1146 emitPTXAddressSpace(GVar->getAddressSpace(), O);
1147
1148 if (isManaged(*GVar)) {
1149 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1151 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1152 }
1153 O << " .attribute(.managed)";
1154 }
1155
1156 if (MaybeAlign A = GVar->getAlign())
1157 O << " .align " << A->value();
1158 else
1159 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1160
1161 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1162 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1163 O << " .";
1164 // Special case: ABI requires that we use .u8 for predicates
1165 if (ETy->isIntegerTy(1))
1166 O << "u8";
1167 else
1168 O << getPTXFundamentalTypeStr(ETy, false);
1169 O << " ";
1170 getSymbol(GVar)->print(O, MAI);
1171
1172 // Ptx allows variable initilization only for constant and global state
1173 // spaces.
1174 if (GVar->hasInitializer()) {
1175 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1176 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1177 const Constant *Initializer = GVar->getInitializer();
1178 // 'undef' is treated as there is no value specified.
1179 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1180 O << " = ";
1181 printScalarConstant(Initializer, O);
1182 }
1183 } else {
1184 // The frontend adds zero-initializer to device and constant variables
1185 // that don't have an initial value, and UndefValue to shared
1186 // variables, so skip warning for this case.
1187 if (!GVar->getInitializer()->isNullValue() &&
1188 !isa<UndefValue>(GVar->getInitializer())) {
1189 report_fatal_error("initial value of '" + GVar->getName() +
1190 "' is not allowed in addrspace(" +
1191 Twine(GVar->getAddressSpace()) + ")");
1192 }
1193 }
1194 }
1195 } else {
1196 uint64_t ElementSize = 0;
1197
1198 // Although PTX has direct support for struct type and array type and
1199 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1200 // targets that support these high level field accesses. Structs, arrays
1201 // and vectors are lowered into arrays of bytes.
1202 switch (ETy->getTypeID()) {
1203 case Type::IntegerTyID: // Integers larger than 64 bits
1204 case Type::StructTyID:
1205 case Type::ArrayTyID:
1207 ElementSize = DL.getTypeStoreSize(ETy);
1208 // Ptx allows variable initilization only for constant and
1209 // global state spaces.
1210 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1211 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1212 GVar->hasInitializer()) {
1213 const Constant *Initializer = GVar->getInitializer();
1214 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1215 AggBuffer aggBuffer(ElementSize, *this);
1216 bufferAggregateConstant(Initializer, &aggBuffer);
1217 if (aggBuffer.numSymbols()) {
1218 unsigned int ptrSize = MAI->getCodePointerSize();
1219 if (ElementSize % ptrSize ||
1220 !aggBuffer.allSymbolsAligned(ptrSize)) {
1221 // Print in bytes and use the mask() operator for pointers.
1222 if (!STI.hasMaskOperator())
1224 "initialized packed aggregate with pointers '" +
1225 GVar->getName() +
1226 "' requires at least PTX ISA version 7.1");
1227 O << " .u8 ";
1228 getSymbol(GVar)->print(O, MAI);
1229 O << "[" << ElementSize << "] = {";
1230 aggBuffer.printBytes(O);
1231 O << "}";
1232 } else {
1233 O << " .u" << ptrSize * 8 << " ";
1234 getSymbol(GVar)->print(O, MAI);
1235 O << "[" << ElementSize / ptrSize << "] = {";
1236 aggBuffer.printWords(O);
1237 O << "}";
1238 }
1239 } else {
1240 O << " .b8 ";
1241 getSymbol(GVar)->print(O, MAI);
1242 O << "[" << ElementSize << "] = {";
1243 aggBuffer.printBytes(O);
1244 O << "}";
1245 }
1246 } else {
1247 O << " .b8 ";
1248 getSymbol(GVar)->print(O, MAI);
1249 if (ElementSize) {
1250 O << "[";
1251 O << ElementSize;
1252 O << "]";
1253 }
1254 }
1255 } else {
1256 O << " .b8 ";
1257 getSymbol(GVar)->print(O, MAI);
1258 if (ElementSize) {
1259 O << "[";
1260 O << ElementSize;
1261 O << "]";
1262 }
1263 }
1264 break;
1265 default:
1266 llvm_unreachable("type not supported yet");
1267 }
1268 }
1269 O << ";\n";
1270}
1271
1272void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1273 const Value *v = Symbols[nSym];
1274 const Value *v0 = SymbolsBeforeStripping[nSym];
1275 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1276 MCSymbol *Name = AP.getSymbol(GVar);
1277 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1278 // Is v0 a generic pointer?
1279 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1280 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1281 os << "generic(";
1282 Name->print(os, AP.MAI);
1283 os << ")";
1284 } else {
1285 Name->print(os, AP.MAI);
1286 }
1287 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1288 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1289 AP.printMCExpr(*Expr, os);
1290 } else
1291 llvm_unreachable("symbol type unknown");
1292}
1293
1294void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1295 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1296 // Do not emit trailing zero initializers. They will be zero-initialized by
1297 // ptxas. This saves on both space requirements for the generated PTX and on
1298 // memory use by ptxas. (See:
1299 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1300 unsigned int InitializerCount = size;
1301 // TODO: symbols make this harder, but it would still be good to trim trailing
1302 // 0s for aggs with symbols as well.
1303 if (numSymbols() == 0)
1304 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1305 InitializerCount--;
1306
1307 symbolPosInBuffer.push_back(InitializerCount);
1308 unsigned int nSym = 0;
1309 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1310 for (unsigned int pos = 0; pos < InitializerCount;) {
1311 if (pos)
1312 os << ", ";
1313 if (pos != nextSymbolPos) {
1314 os << (unsigned int)buffer[pos];
1315 ++pos;
1316 continue;
1317 }
1318 // Generate a per-byte mask() operator for the symbol, which looks like:
1319 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1320 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1321 std::string symText;
1322 llvm::raw_string_ostream oss(symText);
1323 printSymbol(nSym, oss);
1324 for (unsigned i = 0; i < ptrSize; ++i) {
1325 if (i)
1326 os << ", ";
1327 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1328 os << "(" << symText << ")";
1329 }
1330 pos += ptrSize;
1331 nextSymbolPos = symbolPosInBuffer[++nSym];
1332 assert(nextSymbolPos >= pos);
1333 }
1334}
1335
1336void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1337 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1338 symbolPosInBuffer.push_back(size);
1339 unsigned int nSym = 0;
1340 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1341 assert(nextSymbolPos % ptrSize == 0);
1342 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1343 if (pos)
1344 os << ", ";
1345 if (pos == nextSymbolPos) {
1346 printSymbol(nSym, os);
1347 nextSymbolPos = symbolPosInBuffer[++nSym];
1348 assert(nextSymbolPos % ptrSize == 0);
1349 assert(nextSymbolPos >= pos + ptrSize);
1350 } else if (ptrSize == 4)
1351 os << support::endian::read32le(&buffer[pos]);
1352 else
1353 os << support::endian::read64le(&buffer[pos]);
1354 }
1355}
1356
1357void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1358 if (localDecls.find(f) == localDecls.end())
1359 return;
1360
1361 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1362
1363 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1364 const NVPTXSubtarget &STI =
1365 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1366
1367 for (const GlobalVariable *GV : gvars) {
1368 O << "\t// demoted variable\n\t";
1369 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1370 }
1371}
1372
1373void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1374 raw_ostream &O) const {
1375 switch (AddressSpace) {
1377 O << "local";
1378 break;
1380 O << "global";
1381 break;
1383 O << "const";
1384 break;
1386 O << "shared";
1387 break;
1388 default:
1389 report_fatal_error("Bad address space found while emitting PTX: " +
1391 break;
1392 }
1393}
1394
1395std::string
1396NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1397 switch (Ty->getTypeID()) {
1398 case Type::IntegerTyID: {
1399 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1400 if (NumBits == 1)
1401 return "pred";
1402 else if (NumBits <= 64) {
1403 std::string name = "u";
1404 return name + utostr(NumBits);
1405 } else {
1406 llvm_unreachable("Integer too large");
1407 break;
1408 }
1409 break;
1410 }
1411 case Type::BFloatTyID:
1412 case Type::HalfTyID:
1413 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1414 // PTX assembly.
1415 return "b16";
1416 case Type::FloatTyID:
1417 return "f32";
1418 case Type::DoubleTyID:
1419 return "f64";
1420 case Type::PointerTyID: {
1421 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1422 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1423
1424 if (PtrSize == 64)
1425 if (useB4PTR)
1426 return "b64";
1427 else
1428 return "u64";
1429 else if (useB4PTR)
1430 return "b32";
1431 else
1432 return "u32";
1433 }
1434 default:
1435 break;
1436 }
1437 llvm_unreachable("unexpected type");
1438}
1439
1440void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1441 raw_ostream &O,
1442 const NVPTXSubtarget &STI) {
1443 const DataLayout &DL = getDataLayout();
1444
1445 // GlobalVariables are always constant pointers themselves.
1446 Type *ETy = GVar->getValueType();
1447
1448 O << ".";
1449 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1450 if (isManaged(*GVar)) {
1451 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1453 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1454 }
1455 O << " .attribute(.managed)";
1456 }
1457 if (MaybeAlign A = GVar->getAlign())
1458 O << " .align " << A->value();
1459 else
1460 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1461
1462 // Special case for i128
1463 if (ETy->isIntegerTy(128)) {
1464 O << " .b8 ";
1465 getSymbol(GVar)->print(O, MAI);
1466 O << "[16]";
1467 return;
1468 }
1469
1470 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1471 O << " .";
1472 O << getPTXFundamentalTypeStr(ETy);
1473 O << " ";
1474 getSymbol(GVar)->print(O, MAI);
1475 return;
1476 }
1477
1478 int64_t ElementSize = 0;
1479
1480 // Although PTX has direct support for struct type and array type and LLVM IR
1481 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1482 // support these high level field accesses. Structs and arrays are lowered
1483 // into arrays of bytes.
1484 switch (ETy->getTypeID()) {
1485 case Type::StructTyID:
1486 case Type::ArrayTyID:
1488 ElementSize = DL.getTypeStoreSize(ETy);
1489 O << " .b8 ";
1490 getSymbol(GVar)->print(O, MAI);
1491 O << "[";
1492 if (ElementSize) {
1493 O << ElementSize;
1494 }
1495 O << "]";
1496 break;
1497 default:
1498 llvm_unreachable("type not supported yet");
1499 }
1500}
1501
1502void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1503 const DataLayout &DL = getDataLayout();
1504 const AttributeList &PAL = F->getAttributes();
1505 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1506 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1507
1509 unsigned paramIndex = 0;
1510 bool first = true;
1511 bool isKernelFunc = isKernelFunction(*F);
1512 bool isABI = (STI.getSmVersion() >= 20);
1513 bool hasImageHandles = STI.hasImageHandles();
1514
1515 if (F->arg_empty() && !F->isVarArg()) {
1516 O << "()";
1517 return;
1518 }
1519
1520 O << "(\n";
1521
1522 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1523 Type *Ty = I->getType();
1524
1525 if (!first)
1526 O << ",\n";
1527
1528 first = false;
1529
1530 // Handle image/sampler parameters
1531 if (isKernelFunction(*F)) {
1532 if (isSampler(*I) || isImage(*I)) {
1533 if (isImage(*I)) {
1534 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1535 if (hasImageHandles)
1536 O << "\t.param .u64 .ptr .surfref ";
1537 else
1538 O << "\t.param .surfref ";
1539 O << TLI->getParamName(F, paramIndex);
1540 }
1541 else { // Default image is read_only
1542 if (hasImageHandles)
1543 O << "\t.param .u64 .ptr .texref ";
1544 else
1545 O << "\t.param .texref ";
1546 O << TLI->getParamName(F, paramIndex);
1547 }
1548 } else {
1549 if (hasImageHandles)
1550 O << "\t.param .u64 .ptr .samplerref ";
1551 else
1552 O << "\t.param .samplerref ";
1553 O << TLI->getParamName(F, paramIndex);
1554 }
1555 continue;
1556 }
1557 }
1558
1559 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1560 paramIndex](Type *Ty) -> Align {
1561 if (MaybeAlign StackAlign =
1562 getAlign(*F, paramIndex + AttributeList::FirstArgIndex))
1563 return StackAlign.value();
1564
1565 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1566 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1567 return std::max(TypeAlign, ParamAlign.valueOrOne());
1568 };
1569
1570 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1571 if (ShouldPassAsArray(Ty)) {
1572 // Just print .param .align <a> .b8 .param[size];
1573 // <a> = optimal alignment for the element type; always multiple of
1574 // PAL.getParamAlignment
1575 // size = typeallocsize of element type
1576 Align OptimalAlign = getOptimalAlignForParam(Ty);
1577
1578 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1579 O << TLI->getParamName(F, paramIndex);
1580 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1581
1582 continue;
1583 }
1584 // Just a scalar
1585 auto *PTy = dyn_cast<PointerType>(Ty);
1586 unsigned PTySizeInBits = 0;
1587 if (PTy) {
1588 PTySizeInBits =
1589 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1590 assert(PTySizeInBits && "Invalid pointer size");
1591 }
1592
1593 if (isKernelFunc) {
1594 if (PTy) {
1595 // Special handling for pointer arguments to kernel
1596 O << "\t.param .u" << PTySizeInBits << " ";
1597
1598 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1599 NVPTX::CUDA) {
1600 int addrSpace = PTy->getAddressSpace();
1601 switch (addrSpace) {
1602 default:
1603 O << ".ptr ";
1604 break;
1606 O << ".ptr .const ";
1607 break;
1609 O << ".ptr .shared ";
1610 break;
1612 O << ".ptr .global ";
1613 break;
1614 }
1615 Align ParamAlign = I->getParamAlign().valueOrOne();
1616 O << ".align " << ParamAlign.value() << " ";
1617 }
1618 O << TLI->getParamName(F, paramIndex);
1619 continue;
1620 }
1621
1622 // non-pointer scalar to kernel func
1623 O << "\t.param .";
1624 // Special case: predicate operands become .u8 types
1625 if (Ty->isIntegerTy(1))
1626 O << "u8";
1627 else
1628 O << getPTXFundamentalTypeStr(Ty);
1629 O << " ";
1630 O << TLI->getParamName(F, paramIndex);
1631 continue;
1632 }
1633 // Non-kernel function, just print .param .b<size> for ABI
1634 // and .reg .b<size> for non-ABI
1635 unsigned sz = 0;
1636 if (isa<IntegerType>(Ty)) {
1637 sz = cast<IntegerType>(Ty)->getBitWidth();
1639 } else if (PTy) {
1640 assert(PTySizeInBits && "Invalid pointer size");
1641 sz = PTySizeInBits;
1642 } else
1643 sz = Ty->getPrimitiveSizeInBits();
1644 if (isABI)
1645 O << "\t.param .b" << sz << " ";
1646 else
1647 O << "\t.reg .b" << sz << " ";
1648 O << TLI->getParamName(F, paramIndex);
1649 continue;
1650 }
1651
1652 // param has byVal attribute.
1653 Type *ETy = PAL.getParamByValType(paramIndex);
1654 assert(ETy && "Param should have byval type");
1655
1656 if (isABI || isKernelFunc) {
1657 // Just print .param .align <a> .b8 .param[size];
1658 // <a> = optimal alignment for the element type; always multiple of
1659 // PAL.getParamAlignment
1660 // size = typeallocsize of element type
1661 Align OptimalAlign =
1662 isKernelFunc
1663 ? getOptimalAlignForParam(ETy)
1664 : TLI->getFunctionByValParamAlign(
1665 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1666
1667 unsigned sz = DL.getTypeAllocSize(ETy);
1668 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1669 O << TLI->getParamName(F, paramIndex);
1670 O << "[" << sz << "]";
1671 continue;
1672 } else {
1673 // Split the ETy into constituent parts and
1674 // print .param .b<size> <name> for each part.
1675 // Further, if a part is vector, print the above for
1676 // each vector element.
1677 SmallVector<EVT, 16> vtparts;
1678 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1679 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1680 unsigned elems = 1;
1681 EVT elemtype = vtparts[i];
1682 if (vtparts[i].isVector()) {
1683 elems = vtparts[i].getVectorNumElements();
1684 elemtype = vtparts[i].getVectorElementType();
1685 }
1686
1687 for (unsigned j = 0, je = elems; j != je; ++j) {
1688 unsigned sz = elemtype.getSizeInBits();
1689 if (elemtype.isInteger())
1691 O << "\t.reg .b" << sz << " ";
1692 O << TLI->getParamName(F, paramIndex);
1693 if (j < je - 1)
1694 O << ",\n";
1695 ++paramIndex;
1696 }
1697 if (i < e - 1)
1698 O << ",\n";
1699 }
1700 --paramIndex;
1701 continue;
1702 }
1703 }
1704
1705 if (F->isVarArg()) {
1706 if (!first)
1707 O << ",\n";
1708 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1709 O << " .b8 ";
1710 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1711 }
1712
1713 O << "\n)";
1714}
1715
1716void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1717 const MachineFunction &MF) {
1718 SmallString<128> Str;
1720
1721 // Map the global virtual register number to a register class specific
1722 // virtual register number starting from 1 with that class.
1724 //unsigned numRegClasses = TRI->getNumRegClasses();
1725
1726 // Emit the Fake Stack Object
1727 const MachineFrameInfo &MFI = MF.getFrameInfo();
1728 int64_t NumBytes = MFI.getStackSize();
1729 if (NumBytes) {
1730 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1731 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1732 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1733 O << "\t.reg .b64 \t%SP;\n";
1734 O << "\t.reg .b64 \t%SPL;\n";
1735 } else {
1736 O << "\t.reg .b32 \t%SP;\n";
1737 O << "\t.reg .b32 \t%SPL;\n";
1738 }
1739 }
1740
1741 // Go through all virtual registers to establish the mapping between the
1742 // global virtual
1743 // register number and the per class virtual register number.
1744 // We use the per class virtual register number in the ptx output.
1745 unsigned int numVRs = MRI->getNumVirtRegs();
1746 for (unsigned i = 0; i < numVRs; i++) {
1748 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1749 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1750 int n = regmap.size();
1751 regmap.insert(std::make_pair(vr, n + 1));
1752 }
1753
1754 // Emit register declarations
1755 // @TODO: Extract out the real register usage
1756 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1757 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1758 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1759 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1760 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1761 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1762 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1763
1764 // Emit declaration of the virtual registers or 'physical' registers for
1765 // each register class
1766 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1767 const TargetRegisterClass *RC = TRI->getRegClass(i);
1768 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1769 std::string rcname = getNVPTXRegClassName(RC);
1770 std::string rcStr = getNVPTXRegClassStr(RC);
1771 int n = regmap.size();
1772
1773 // Only declare those registers that may be used.
1774 if (n) {
1775 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1776 << ">;\n";
1777 }
1778 }
1779
1780 OutStreamer->emitRawText(O.str());
1781}
1782
1783void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1784 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1785 bool ignored;
1786 unsigned int numHex;
1787 const char *lead;
1788
1789 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1790 numHex = 8;
1791 lead = "0f";
1793 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1794 numHex = 16;
1795 lead = "0d";
1797 } else
1798 llvm_unreachable("unsupported fp type");
1799
1800 APInt API = APF.bitcastToAPInt();
1801 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1802}
1803
1804void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1805 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1806 O << CI->getValue();
1807 return;
1808 }
1809 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1810 printFPConstant(CFP, O);
1811 return;
1812 }
1813 if (isa<ConstantPointerNull>(CPV)) {
1814 O << "0";
1815 return;
1816 }
1817 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1818 bool IsNonGenericPointer = false;
1819 if (GVar->getType()->getAddressSpace() != 0) {
1820 IsNonGenericPointer = true;
1821 }
1822 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1823 O << "generic(";
1824 getSymbol(GVar)->print(O, MAI);
1825 O << ")";
1826 } else {
1827 getSymbol(GVar)->print(O, MAI);
1828 }
1829 return;
1830 }
1831 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1832 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1833 printMCExpr(*E, O);
1834 return;
1835 }
1836 llvm_unreachable("Not scalar type found in printScalarConstant()");
1837}
1838
1839void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1840 AggBuffer *AggBuffer) {
1841 const DataLayout &DL = getDataLayout();
1842 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1843 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1844 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1845 // only the space allocated by CPV.
1846 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1847 return;
1848 }
1849
1850 // Helper for filling AggBuffer with APInts.
1851 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1852 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1853 SmallVector<unsigned char, 16> Buf(NumBytes);
1854 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1855 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1856 }
1857 size_t LastBytePosition = (NumBytes - 1) * 8;
1858 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1859 Buf[NumBytes - 1] =
1860 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1861 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1862 };
1863
1864 switch (CPV->getType()->getTypeID()) {
1865 case Type::IntegerTyID:
1866 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1867 AddIntToBuffer(CI->getValue());
1868 break;
1869 }
1870 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1871 if (const auto *CI =
1872 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1873 AddIntToBuffer(CI->getValue());
1874 break;
1875 }
1876 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1877 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1878 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1879 AggBuffer->addZeros(AllocSize);
1880 break;
1881 }
1882 }
1883 llvm_unreachable("unsupported integer const type");
1884 break;
1885
1886 case Type::HalfTyID:
1887 case Type::BFloatTyID:
1888 case Type::FloatTyID:
1889 case Type::DoubleTyID:
1890 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1891 break;
1892
1893 case Type::PointerTyID: {
1894 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1895 AggBuffer->addSymbol(GVar, GVar);
1896 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1897 const Value *v = Cexpr->stripPointerCasts();
1898 AggBuffer->addSymbol(v, Cexpr);
1899 }
1900 AggBuffer->addZeros(AllocSize);
1901 break;
1902 }
1903
1904 case Type::ArrayTyID:
1906 case Type::StructTyID: {
1907 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1908 bufferAggregateConstant(CPV, AggBuffer);
1909 if (Bytes > AllocSize)
1910 AggBuffer->addZeros(Bytes - AllocSize);
1911 } else if (isa<ConstantAggregateZero>(CPV))
1912 AggBuffer->addZeros(Bytes);
1913 else
1914 llvm_unreachable("Unexpected Constant type");
1915 break;
1916 }
1917
1918 default:
1919 llvm_unreachable("unsupported type");
1920 }
1921}
1922
1923void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1924 AggBuffer *aggBuffer) {
1925 const DataLayout &DL = getDataLayout();
1926 int Bytes;
1927
1928 // Integers of arbitrary width
1929 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1930 APInt Val = CI->getValue();
1931 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1932 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1933 aggBuffer->addBytes(&Byte, 1, 1);
1934 Val.lshrInPlace(8);
1935 }
1936 return;
1937 }
1938
1939 // Old constants
1940 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1941 if (CPV->getNumOperands())
1942 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1943 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1944 return;
1945 }
1946
1947 if (const ConstantDataSequential *CDS =
1948 dyn_cast<ConstantDataSequential>(CPV)) {
1949 if (CDS->getNumElements())
1950 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1951 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1952 aggBuffer);
1953 return;
1954 }
1955
1956 if (isa<ConstantStruct>(CPV)) {
1957 if (CPV->getNumOperands()) {
1958 StructType *ST = cast<StructType>(CPV->getType());
1959 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1960 if (i == (e - 1))
1961 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1962 DL.getTypeAllocSize(ST) -
1963 DL.getStructLayout(ST)->getElementOffset(i);
1964 else
1965 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1966 DL.getStructLayout(ST)->getElementOffset(i);
1967 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1968 }
1969 }
1970 return;
1971 }
1972 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1973}
1974
1975/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1976/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1977/// expressions that are representable in PTX and create
1978/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1979const MCExpr *
1980NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1981 MCContext &Ctx = OutContext;
1982
1983 if (CV->isNullValue() || isa<UndefValue>(CV))
1984 return MCConstantExpr::create(0, Ctx);
1985
1986 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1987 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1988
1989 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1990 const MCSymbolRefExpr *Expr =
1992 if (ProcessingGeneric) {
1993 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1994 } else {
1995 return Expr;
1996 }
1997 }
1998
1999 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
2000 if (!CE) {
2001 llvm_unreachable("Unknown constant value to lower!");
2002 }
2003
2004 switch (CE->getOpcode()) {
2005 default:
2006 break; // Error
2007
2008 case Instruction::AddrSpaceCast: {
2009 // Strip the addrspacecast and pass along the operand
2010 PointerType *DstTy = cast<PointerType>(CE->getType());
2011 if (DstTy->getAddressSpace() == 0)
2012 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2013
2014 break; // Error
2015 }
2016
2017 case Instruction::GetElementPtr: {
2018 const DataLayout &DL = getDataLayout();
2019
2020 // Generate a symbolic expression for the byte address
2021 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2022 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2023
2024 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2025 ProcessingGeneric);
2026 if (!OffsetAI)
2027 return Base;
2028
2029 int64_t Offset = OffsetAI.getSExtValue();
2031 Ctx);
2032 }
2033
2034 case Instruction::Trunc:
2035 // We emit the value and depend on the assembler to truncate the generated
2036 // expression properly. This is important for differences between
2037 // blockaddress labels. Since the two labels are in the same function, it
2038 // is reasonable to treat their delta as a 32-bit value.
2039 [[fallthrough]];
2040 case Instruction::BitCast:
2041 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2042
2043 case Instruction::IntToPtr: {
2044 const DataLayout &DL = getDataLayout();
2045
2046 // Handle casts to pointers by changing them into casts to the appropriate
2047 // integer type. This promotes constant folding and simplifies this code.
2048 Constant *Op = CE->getOperand(0);
2049 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2050 /*IsSigned*/ false, DL);
2051 if (Op)
2052 return lowerConstantForGV(Op, ProcessingGeneric);
2053
2054 break; // Error
2055 }
2056
2057 case Instruction::PtrToInt: {
2058 const DataLayout &DL = getDataLayout();
2059
2060 // Support only foldable casts to/from pointers that can be eliminated by
2061 // changing the pointer to the appropriately sized integer type.
2062 Constant *Op = CE->getOperand(0);
2063 Type *Ty = CE->getType();
2064
2065 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2066
2067 // We can emit the pointer value into this slot if the slot is an
2068 // integer slot equal to the size of the pointer.
2069 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2070 return OpExpr;
2071
2072 // Otherwise the pointer is smaller than the resultant integer, mask off
2073 // the high bits so we are sure to get a proper truncation if the input is
2074 // a constant expr.
2075 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2076 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2077 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2078 }
2079
2080 // The MC library also has a right-shift operator, but it isn't consistently
2081 // signed or unsigned between different targets.
2082 case Instruction::Add: {
2083 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2084 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2085 switch (CE->getOpcode()) {
2086 default: llvm_unreachable("Unknown binary operator constant cast expr");
2087 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2088 }
2089 }
2090 }
2091
2092 // If the code isn't optimized, there may be outstanding folding
2093 // opportunities. Attempt to fold the expression using DataLayout as a
2094 // last resort before giving up.
2096 if (C != CE)
2097 return lowerConstantForGV(C, ProcessingGeneric);
2098
2099 // Otherwise report the problem to the user.
2100 std::string S;
2102 OS << "Unsupported expression in static initializer: ";
2103 CE->printAsOperand(OS, /*PrintType=*/false,
2104 !MF ? nullptr : MF->getFunction().getParent());
2105 report_fatal_error(Twine(OS.str()));
2106}
2107
2108// Copy of MCExpr::print customized for NVPTX
2109void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2110 switch (Expr.getKind()) {
2111 case MCExpr::Target:
2112 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2113 case MCExpr::Constant:
2114 OS << cast<MCConstantExpr>(Expr).getValue();
2115 return;
2116
2117 case MCExpr::SymbolRef: {
2118 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2119 const MCSymbol &Sym = SRE.getSymbol();
2120 Sym.print(OS, MAI);
2121 return;
2122 }
2123
2124 case MCExpr::Unary: {
2125 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2126 switch (UE.getOpcode()) {
2127 case MCUnaryExpr::LNot: OS << '!'; break;
2128 case MCUnaryExpr::Minus: OS << '-'; break;
2129 case MCUnaryExpr::Not: OS << '~'; break;
2130 case MCUnaryExpr::Plus: OS << '+'; break;
2131 }
2132 printMCExpr(*UE.getSubExpr(), OS);
2133 return;
2134 }
2135
2136 case MCExpr::Binary: {
2137 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2138
2139 // Only print parens around the LHS if it is non-trivial.
2140 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2141 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2142 printMCExpr(*BE.getLHS(), OS);
2143 } else {
2144 OS << '(';
2145 printMCExpr(*BE.getLHS(), OS);
2146 OS<< ')';
2147 }
2148
2149 switch (BE.getOpcode()) {
2150 case MCBinaryExpr::Add:
2151 // Print "X-42" instead of "X+-42".
2152 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2153 if (RHSC->getValue() < 0) {
2154 OS << RHSC->getValue();
2155 return;
2156 }
2157 }
2158
2159 OS << '+';
2160 break;
2161 default: llvm_unreachable("Unhandled binary operator");
2162 }
2163
2164 // Only print parens around the LHS if it is non-trivial.
2165 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2166 printMCExpr(*BE.getRHS(), OS);
2167 } else {
2168 OS << '(';
2169 printMCExpr(*BE.getRHS(), OS);
2170 OS << ')';
2171 }
2172 return;
2173 }
2174 }
2175
2176 llvm_unreachable("Invalid expression kind!");
2177}
2178
2179/// PrintAsmOperand - Print out an operand for an inline asm expression.
2180///
2181bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2182 const char *ExtraCode, raw_ostream &O) {
2183 if (ExtraCode && ExtraCode[0]) {
2184 if (ExtraCode[1] != 0)
2185 return true; // Unknown modifier.
2186
2187 switch (ExtraCode[0]) {
2188 default:
2189 // See if this is a generic print operand
2190 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2191 case 'r':
2192 break;
2193 }
2194 }
2195
2196 printOperand(MI, OpNo, O);
2197
2198 return false;
2199}
2200
2201bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2202 unsigned OpNo,
2203 const char *ExtraCode,
2204 raw_ostream &O) {
2205 if (ExtraCode && ExtraCode[0])
2206 return true; // Unknown modifier
2207
2208 O << '[';
2209 printMemOperand(MI, OpNo, O);
2210 O << ']';
2211
2212 return false;
2213}
2214
2215void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2216 raw_ostream &O) {
2217 const MachineOperand &MO = MI->getOperand(OpNum);
2218 switch (MO.getType()) {
2220 if (MO.getReg().isPhysical()) {
2221 if (MO.getReg() == NVPTX::VRDepot)
2223 else
2225 } else {
2226 emitVirtualRegister(MO.getReg(), O);
2227 }
2228 break;
2229
2231 O << MO.getImm();
2232 break;
2233
2235 printFPConstant(MO.getFPImm(), O);
2236 break;
2237
2239 PrintSymbolOperand(MO, O);
2240 break;
2241
2243 MO.getMBB()->getSymbol()->print(O, MAI);
2244 break;
2245
2246 default:
2247 llvm_unreachable("Operand type not supported.");
2248 }
2249}
2250
2251void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2252 raw_ostream &O, const char *Modifier) {
2253 printOperand(MI, OpNum, O);
2254
2255 if (Modifier && strcmp(Modifier, "add") == 0) {
2256 O << ", ";
2257 printOperand(MI, OpNum + 1, O);
2258 } else {
2259 if (MI->getOperand(OpNum + 1).isImm() &&
2260 MI->getOperand(OpNum + 1).getImm() == 0)
2261 return; // don't print ',0' or '+0'
2262 O << "+";
2263 printOperand(MI, OpNum + 1, O);
2264 }
2265}
2266
2267// Force static initialization.
2271}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static cl::opt< bool > LowerCtorDtor("nvptx-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(false), cl::Hidden)
static bool ShouldPassAsArray(Type *Ty)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:49
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5191
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:704
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:419
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:450
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:395
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(Twine Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:395
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:403
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:423
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:414
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
ConstantArray - Constant Array Declarations.
Definition: Constants.h:423
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:583
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Subprogram description.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1830
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:568
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:110
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:514
bool hasExternalLinkage() const
Definition: GlobalValue.h:510
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:286
bool hasLocalLinkage() const
Definition: GlobalValue.h:527
bool hasPrivateLinkage() const
Definition: GlobalValue.h:526
unsigned getAddressSpace() const
Definition: GlobalValue.h:204
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:293
bool hasWeakLinkage() const
Definition: GlobalValue.h:521
bool hasCommonLinkage() const
Definition: GlobalValue.h:531
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:511
Type * getValueType() const
Definition: GlobalValue.h:295
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:546
Binary assembler expressions.
Definition: MCExpr.h:492
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:639
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:642
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:541
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:536
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:636
@ Add
Addition.
Definition: MCExpr.h:495
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ Unary
Unary expressions.
Definition: MCExpr.h:41
@ Constant
Constant expressions.
Definition: MCExpr.h:39
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
@ Target
Target specific expression.
Definition: MCExpr.h:42
@ Binary
Binary expressions.
Definition: MCExpr.h:38
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:410
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:436
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:479
@ Minus
Unary minus.
Definition: MCExpr.h:440
@ Plus
Unary plus.
Definition: MCExpr.h:442
@ Not
Bitwise negation.
Definition: MCExpr.h:441
@ LNot
Logical negation.
Definition: MCExpr.h:439
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:482
Metadata node.
Definition: Metadata.h:1067
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool hasDebugInfo() const
Returns true if valid debug info is present.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:59
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:679
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:258
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:216
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:74
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:73
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:79
@ CUDA
Definition: NVPTX.h:80
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
uint64_t read64le(const void *P)
Definition: Endian.h:427
uint32_t read32le(const void *P)
Definition: Endian.h:424
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
MaybeAlign getAlign(const Function &F, unsigned Index)
bool getMaxNTIDz(const Function &F, unsigned &z)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
std::string getSurfaceName(const Value &val)
bool getReqNTIDx(const Function &F, unsigned &x)
bool getReqNTIDy(const Function &F, unsigned &y)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:200
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool getReqNTIDz(const Function &F, unsigned &z)
bool getMaxNTIDx(const Function &F, unsigned &x)
bool getMaxNTIDy(const Function &F, unsigned &y)
bool isSampler(const Value &val)
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool getMaxClusterRank(const Function &F, unsigned &x)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...