LLVM 19.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalAlias.h"
61#include "llvm/IR/GlobalValue.h"
63#include "llvm/IR/Instruction.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/MC/MCExpr.h"
70#include "llvm/MC/MCInst.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/MC/MCStreamer.h"
73#include "llvm/MC/MCSymbol.h"
77#include "llvm/Support/Endian.h"
80#include "llvm/Support/Path.h"
86#include <cassert>
87#include <cstdint>
88#include <cstring>
89#include <new>
90#include <string>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95
96static cl::opt<bool>
97 LowerCtorDtor("nvptx-lower-global-ctor-dtor",
98 cl::desc("Lower GPU ctor / dtors to globals on the device."),
99 cl::init(false), cl::Hidden);
100
101#define DEPOTNAME "__local_depot"
102
103/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
104/// depends.
105static void
108 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
109 Globals.insert(GV);
110 else {
111 if (const User *U = dyn_cast<User>(V)) {
112 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
113 DiscoverDependentGlobals(U->getOperand(i), Globals);
114 }
115 }
116 }
117}
118
119/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
120/// instances to be emitted, but only after any dependents have been added
121/// first.s
122static void
127 // Have we already visited this one?
128 if (Visited.count(GV))
129 return;
130
131 // Do we have a circular dependency?
132 if (!Visiting.insert(GV).second)
133 report_fatal_error("Circular dependency found in global variable set");
134
135 // Make sure we visit all dependents first
137 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
138 DiscoverDependentGlobals(GV->getOperand(i), Others);
139
140 for (const GlobalVariable *GV : Others)
141 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
142
143 // Now we can visit ourself
144 Order.push_back(GV);
145 Visited.insert(GV);
146 Visiting.erase(GV);
147}
148
149void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
150 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
151 getSubtargetInfo().getFeatureBits());
152
153 MCInst Inst;
154 lowerToMCInst(MI, Inst);
156}
157
158// Handle symbol backtracking for targets that do not support image handles
159bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
160 unsigned OpNo, MCOperand &MCOp) {
161 const MachineOperand &MO = MI->getOperand(OpNo);
162 const MCInstrDesc &MCID = MI->getDesc();
163
164 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
165 // This is a texture fetch, so operand 4 is a texref and operand 5 is
166 // a samplerref
167 if (OpNo == 4 && MO.isImm()) {
168 lowerImageHandleSymbol(MO.getImm(), MCOp);
169 return true;
170 }
171 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
172 lowerImageHandleSymbol(MO.getImm(), MCOp);
173 return true;
174 }
175
176 return false;
177 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
178 unsigned VecSize =
179 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
180
181 // For a surface load of vector size N, the Nth operand will be the surfref
182 if (OpNo == VecSize && MO.isImm()) {
183 lowerImageHandleSymbol(MO.getImm(), MCOp);
184 return true;
185 }
186
187 return false;
188 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
189 // This is a surface store, so operand 0 is a surfref
190 if (OpNo == 0 && MO.isImm()) {
191 lowerImageHandleSymbol(MO.getImm(), MCOp);
192 return true;
193 }
194
195 return false;
196 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
197 // This is a query, so operand 1 is a surfref/texref
198 if (OpNo == 1 && MO.isImm()) {
199 lowerImageHandleSymbol(MO.getImm(), MCOp);
200 return true;
201 }
202
203 return false;
204 }
205
206 return false;
207}
208
209void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
210 // Ewwww
212 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
214 const char *Sym = MFI->getImageHandleSymbol(Index);
215 StringRef SymName = nvTM.getStrPool().save(Sym);
216 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
217}
218
219void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
220 OutMI.setOpcode(MI->getOpcode());
221 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
222 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
223 const MachineOperand &MO = MI->getOperand(0);
224 OutMI.addOperand(GetSymbolRef(
226 return;
227 }
228
229 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
230 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
231 const MachineOperand &MO = MI->getOperand(i);
232
233 MCOperand MCOp;
234 if (!STI.hasImageHandles()) {
235 if (lowerImageHandleOperand(MI, i, MCOp)) {
236 OutMI.addOperand(MCOp);
237 continue;
238 }
239 }
240
241 if (lowerOperand(MO, MCOp))
242 OutMI.addOperand(MCOp);
243 }
244}
245
246bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
247 MCOperand &MCOp) {
248 switch (MO.getType()) {
249 default: llvm_unreachable("unknown operand type");
251 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
252 break;
254 MCOp = MCOperand::createImm(MO.getImm());
255 break;
258 MO.getMBB()->getSymbol(), OutContext));
259 break;
261 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
262 break;
264 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
265 break;
267 const ConstantFP *Cnt = MO.getFPImm();
268 const APFloat &Val = Cnt->getValueAPF();
269
270 switch (Cnt->getType()->getTypeID()) {
271 default: report_fatal_error("Unsupported FP type"); break;
272 case Type::HalfTyID:
275 break;
276 case Type::BFloatTyID:
279 break;
280 case Type::FloatTyID:
283 break;
284 case Type::DoubleTyID:
287 break;
288 }
289 break;
290 }
291 }
292 return true;
293}
294
295unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
297 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
298
299 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
300 unsigned RegNum = RegMap[Reg];
301
302 // Encode the register class in the upper 4 bits
303 // Must be kept in sync with NVPTXInstPrinter::printRegName
304 unsigned Ret = 0;
305 if (RC == &NVPTX::Int1RegsRegClass) {
306 Ret = (1 << 28);
307 } else if (RC == &NVPTX::Int16RegsRegClass) {
308 Ret = (2 << 28);
309 } else if (RC == &NVPTX::Int32RegsRegClass) {
310 Ret = (3 << 28);
311 } else if (RC == &NVPTX::Int64RegsRegClass) {
312 Ret = (4 << 28);
313 } else if (RC == &NVPTX::Float32RegsRegClass) {
314 Ret = (5 << 28);
315 } else if (RC == &NVPTX::Float64RegsRegClass) {
316 Ret = (6 << 28);
317 } else {
318 report_fatal_error("Bad register class");
319 }
320
321 // Insert the vreg number
322 Ret |= (RegNum & 0x0FFFFFFF);
323 return Ret;
324 } else {
325 // Some special-use registers are actually physical registers.
326 // Encode this as the register class ID of 0 and the real register ID.
327 return Reg & 0x0FFFFFFF;
328 }
329}
330
331MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
332 const MCExpr *Expr;
334 OutContext);
335 return MCOperand::createExpr(Expr);
336}
337
338static bool ShouldPassAsArray(Type *Ty) {
339 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
340 Ty->isHalfTy() || Ty->isBFloatTy();
341}
342
343void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
344 const DataLayout &DL = getDataLayout();
346 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
347
348 Type *Ty = F->getReturnType();
349
350 bool isABI = (STI.getSmVersion() >= 20);
351
352 if (Ty->getTypeID() == Type::VoidTyID)
353 return;
354 O << " (";
355
356 if (isABI) {
357 if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
358 !ShouldPassAsArray(Ty)) {
359 unsigned size = 0;
360 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
361 size = ITy->getBitWidth();
362 } else {
363 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
365 }
367 O << ".param .b" << size << " func_retval0";
368 } else if (isa<PointerType>(Ty)) {
369 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
370 << " func_retval0";
371 } else if (ShouldPassAsArray(Ty)) {
372 unsigned totalsz = DL.getTypeAllocSize(Ty);
373 unsigned retAlignment = 0;
374 if (!getAlign(*F, 0, retAlignment))
375 retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
376 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
377 << "]";
378 } else
379 llvm_unreachable("Unknown return type");
380 } else {
381 SmallVector<EVT, 16> vtparts;
382 ComputeValueVTs(*TLI, DL, Ty, vtparts);
383 unsigned idx = 0;
384 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
385 unsigned elems = 1;
386 EVT elemtype = vtparts[i];
387 if (vtparts[i].isVector()) {
388 elems = vtparts[i].getVectorNumElements();
389 elemtype = vtparts[i].getVectorElementType();
390 }
391
392 for (unsigned j = 0, je = elems; j != je; ++j) {
393 unsigned sz = elemtype.getSizeInBits();
394 if (elemtype.isInteger())
396 O << ".reg .b" << sz << " func_retval" << idx;
397 if (j < je - 1)
398 O << ", ";
399 ++idx;
400 }
401 if (i < e - 1)
402 O << ", ";
403 }
404 }
405 O << ") ";
406}
407
408void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
409 raw_ostream &O) {
410 const Function &F = MF.getFunction();
411 printReturnValStr(&F, O);
412}
413
414// Return true if MBB is the header of a loop marked with
415// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
416bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
417 const MachineBasicBlock &MBB) const {
418 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
419 // We insert .pragma "nounroll" only to the loop header.
420 if (!LI.isLoopHeader(&MBB))
421 return false;
422
423 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
424 // we iterate through each back edge of the loop with header MBB, and check
425 // whether its metadata contains llvm.loop.unroll.disable.
426 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
427 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
428 // Edges from other loops to MBB are not back edges.
429 continue;
430 }
431 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
432 if (MDNode *LoopID =
433 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
434 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
435 return true;
436 if (MDNode *UnrollCountMD =
437 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
438 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
439 ->isOne())
440 return true;
441 }
442 }
443 }
444 }
445 return false;
446}
447
448void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
450 if (isLoopHeaderOfNoUnroll(MBB))
451 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
452}
453
454void NVPTXAsmPrinter::emitFunctionEntryLabel() {
457
458 if (!GlobalsEmitted) {
459 emitGlobals(*MF->getFunction().getParent());
460 GlobalsEmitted = true;
461 }
462
463 // Set up
464 MRI = &MF->getRegInfo();
465 F = &MF->getFunction();
466 emitLinkageDirective(F, O);
467 if (isKernelFunction(*F))
468 O << ".entry ";
469 else {
470 O << ".func ";
471 printReturnValStr(*MF, O);
472 }
473
475
476 emitFunctionParamList(F, O);
477 O << "\n";
478
479 if (isKernelFunction(*F))
480 emitKernelFunctionDirectives(*F, O);
481
483 O << ".noreturn";
484
485 OutStreamer->emitRawText(O.str());
486
487 VRegMapping.clear();
488 // Emit open brace for function body.
489 OutStreamer->emitRawText(StringRef("{\n"));
490 setAndEmitFunctionVirtualRegisters(*MF);
491 // Emit initial .loc debug directive for correct relocation symbol data.
492 if (MMI && MMI->hasDebugInfo())
494}
495
497 bool Result = AsmPrinter::runOnMachineFunction(F);
498 // Emit closing brace for the body of function F.
499 // The closing brace must be emitted here because we need to emit additional
500 // debug labels/data after the last basic block.
501 // We need to emit the closing brace here because we don't have function that
502 // finished emission of the function body.
503 OutStreamer->emitRawText(StringRef("}\n"));
504 return Result;
505}
506
507void NVPTXAsmPrinter::emitFunctionBodyStart() {
509 raw_svector_ostream O(Str);
510 emitDemotedVars(&MF->getFunction(), O);
511 OutStreamer->emitRawText(O.str());
512}
513
514void NVPTXAsmPrinter::emitFunctionBodyEnd() {
515 VRegMapping.clear();
516}
517
521 return OutContext.getOrCreateSymbol(Str);
522}
523
524void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
525 Register RegNo = MI->getOperand(0).getReg();
526 if (RegNo.isVirtual()) {
527 OutStreamer->AddComment(Twine("implicit-def: ") +
529 } else {
530 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
531 OutStreamer->AddComment(Twine("implicit-def: ") +
532 STI.getRegisterInfo()->getName(RegNo));
533 }
534 OutStreamer->addBlankLine();
535}
536
537void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
538 raw_ostream &O) const {
539 // If the NVVM IR has some of reqntid* specified, then output
540 // the reqntid directive, and set the unspecified ones to 1.
541 // If none of Reqntid* is specified, don't output reqntid directive.
542 unsigned Reqntidx, Reqntidy, Reqntidz;
543 Reqntidx = Reqntidy = Reqntidz = 1;
544 bool ReqSpecified = false;
545 ReqSpecified |= getReqNTIDx(F, Reqntidx);
546 ReqSpecified |= getReqNTIDy(F, Reqntidy);
547 ReqSpecified |= getReqNTIDz(F, Reqntidz);
548
549 if (ReqSpecified)
550 O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
551 << "\n";
552
553 // If the NVVM IR has some of maxntid* specified, then output
554 // the maxntid directive, and set the unspecified ones to 1.
555 // If none of maxntid* is specified, don't output maxntid directive.
556 unsigned Maxntidx, Maxntidy, Maxntidz;
557 Maxntidx = Maxntidy = Maxntidz = 1;
558 bool MaxSpecified = false;
559 MaxSpecified |= getMaxNTIDx(F, Maxntidx);
560 MaxSpecified |= getMaxNTIDy(F, Maxntidy);
561 MaxSpecified |= getMaxNTIDz(F, Maxntidz);
562
563 if (MaxSpecified)
564 O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
565 << "\n";
566
567 unsigned Mincta = 0;
568 if (getMinCTASm(F, Mincta))
569 O << ".minnctapersm " << Mincta << "\n";
570
571 unsigned Maxnreg = 0;
572 if (getMaxNReg(F, Maxnreg))
573 O << ".maxnreg " << Maxnreg << "\n";
574
575 // .maxclusterrank directive requires SM_90 or higher, make sure that we
576 // filter it out for lower SM versions, as it causes a hard ptxas crash.
577 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
578 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
579 unsigned Maxclusterrank = 0;
580 if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
581 O << ".maxclusterrank " << Maxclusterrank << "\n";
582}
583
584std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
585 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
586
587 std::string Name;
588 raw_string_ostream NameStr(Name);
589
590 VRegRCMap::const_iterator I = VRegMapping.find(RC);
591 assert(I != VRegMapping.end() && "Bad register class");
592 const DenseMap<unsigned, unsigned> &RegMap = I->second;
593
594 VRegMap::const_iterator VI = RegMap.find(Reg);
595 assert(VI != RegMap.end() && "Bad virtual register");
596 unsigned MappedVR = VI->second;
597
598 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
599
600 NameStr.flush();
601 return Name;
602}
603
604void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
605 raw_ostream &O) {
606 O << getVirtualRegisterName(vr);
607}
608
609void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
610 raw_ostream &O) {
611 const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject());
612 if (!F || isKernelFunction(*F) || F->isDeclaration())
614 "NVPTX aliasee must be a non-kernel function definition");
615
616 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
618 report_fatal_error("NVPTX aliasee must not be '.weak'");
619
620 emitDeclarationWithName(F, getSymbol(GA), O);
621}
622
623void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
624 emitDeclarationWithName(F, getSymbol(F), O);
625}
626
627void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
628 raw_ostream &O) {
629 emitLinkageDirective(F, O);
630 if (isKernelFunction(*F))
631 O << ".entry ";
632 else
633 O << ".func ";
634 printReturnValStr(F, O);
635 S->print(O, MAI);
636 O << "\n";
637 emitFunctionParamList(F, O);
638 O << "\n";
640 O << ".noreturn";
641 O << ";\n";
642}
643
644static bool usedInGlobalVarDef(const Constant *C) {
645 if (!C)
646 return false;
647
648 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
649 return GV->getName() != "llvm.used";
650 }
651
652 for (const User *U : C->users())
653 if (const Constant *C = dyn_cast<Constant>(U))
655 return true;
656
657 return false;
658}
659
660static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
661 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
662 if (othergv->getName() == "llvm.used")
663 return true;
664 }
665
666 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
667 if (instr->getParent() && instr->getParent()->getParent()) {
668 const Function *curFunc = instr->getParent()->getParent();
669 if (oneFunc && (curFunc != oneFunc))
670 return false;
671 oneFunc = curFunc;
672 return true;
673 } else
674 return false;
675 }
676
677 for (const User *UU : U->users())
678 if (!usedInOneFunc(UU, oneFunc))
679 return false;
680
681 return true;
682}
683
684/* Find out if a global variable can be demoted to local scope.
685 * Currently, this is valid for CUDA shared variables, which have local
686 * scope and global lifetime. So the conditions to check are :
687 * 1. Is the global variable in shared address space?
688 * 2. Does it have local linkage?
689 * 3. Is the global variable referenced only in one function?
690 */
691static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
692 if (!gv->hasLocalLinkage())
693 return false;
694 PointerType *Pty = gv->getType();
695 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
696 return false;
697
698 const Function *oneFunc = nullptr;
699
700 bool flag = usedInOneFunc(gv, oneFunc);
701 if (!flag)
702 return false;
703 if (!oneFunc)
704 return false;
705 f = oneFunc;
706 return true;
707}
708
709static bool useFuncSeen(const Constant *C,
711 for (const User *U : C->users()) {
712 if (const Constant *cu = dyn_cast<Constant>(U)) {
713 if (useFuncSeen(cu, seenMap))
714 return true;
715 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
716 const BasicBlock *bb = I->getParent();
717 if (!bb)
718 continue;
719 const Function *caller = bb->getParent();
720 if (!caller)
721 continue;
722 if (seenMap.contains(caller))
723 return true;
724 }
725 }
726 return false;
727}
728
729void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
731 for (const Function &F : M) {
732 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
733 emitDeclaration(&F, O);
734 continue;
735 }
736
737 if (F.isDeclaration()) {
738 if (F.use_empty())
739 continue;
740 if (F.getIntrinsicID())
741 continue;
742 emitDeclaration(&F, O);
743 continue;
744 }
745 for (const User *U : F.users()) {
746 if (const Constant *C = dyn_cast<Constant>(U)) {
747 if (usedInGlobalVarDef(C)) {
748 // The use is in the initialization of a global variable
749 // that is a function pointer, so print a declaration
750 // for the original function
751 emitDeclaration(&F, O);
752 break;
753 }
754 // Emit a declaration of this function if the function that
755 // uses this constant expr has already been seen.
756 if (useFuncSeen(C, seenMap)) {
757 emitDeclaration(&F, O);
758 break;
759 }
760 }
761
762 if (!isa<Instruction>(U))
763 continue;
764 const Instruction *instr = cast<Instruction>(U);
765 const BasicBlock *bb = instr->getParent();
766 if (!bb)
767 continue;
768 const Function *caller = bb->getParent();
769 if (!caller)
770 continue;
771
772 // If a caller has already been seen, then the caller is
773 // appearing in the module before the callee. so print out
774 // a declaration for the callee.
775 if (seenMap.contains(caller)) {
776 emitDeclaration(&F, O);
777 break;
778 }
779 }
780 seenMap[&F] = true;
781 }
782 for (const GlobalAlias &GA : M.aliases())
783 emitAliasDeclaration(&GA, O);
784}
785
787 if (!GV) return true;
788 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
789 if (!InitList) return true; // Not an array; we don't know how to parse.
790 return InitList->getNumOperands() == 0;
791}
792
793void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
794 // Construct a default subtarget off of the TargetMachine defaults. The
795 // rest of NVPTX isn't friendly to change subtargets per function and
796 // so the default TargetMachine will have all of the options.
797 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
798 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
799 SmallString<128> Str1;
800 raw_svector_ostream OS1(Str1);
801
802 // Emit header before any dwarf directives are emitted below.
803 emitHeader(M, OS1, *STI);
804 OutStreamer->emitRawText(OS1.str());
805}
806
808 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
809 const NVPTXSubtarget &STI =
810 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
811 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
812 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
813
814 // OpenMP supports NVPTX global constructors and destructors.
815 bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
816
817 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
818 !LowerCtorDtor && !IsOpenMP) {
820 "Module has a nontrivial global ctor, which NVPTX does not support.");
821 return true; // error
822 }
823 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
824 !LowerCtorDtor && !IsOpenMP) {
826 "Module has a nontrivial global dtor, which NVPTX does not support.");
827 return true; // error
828 }
829
830 // We need to call the parent's one explicitly.
831 bool Result = AsmPrinter::doInitialization(M);
832
833 GlobalsEmitted = false;
834
835 return Result;
836}
837
838void NVPTXAsmPrinter::emitGlobals(const Module &M) {
839 SmallString<128> Str2;
840 raw_svector_ostream OS2(Str2);
841
842 emitDeclarations(M, OS2);
843
844 // As ptxas does not support forward references of globals, we need to first
845 // sort the list of module-level globals in def-use order. We visit each
846 // global variable in order, and ensure that we emit it *after* its dependent
847 // globals. We use a little extra memory maintaining both a set and a list to
848 // have fast searches while maintaining a strict ordering.
852
853 // Visit each global variable, in order
854 for (const GlobalVariable &I : M.globals())
855 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
856
857 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
858 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
859
860 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
861 const NVPTXSubtarget &STI =
862 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
863
864 // Print out module-level global variables in proper order
865 for (unsigned i = 0, e = Globals.size(); i != e; ++i)
866 printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
867
868 OS2 << '\n';
869
870 OutStreamer->emitRawText(OS2.str());
871}
872
873void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
876
877 MCSymbol *Name = getSymbol(&GA);
878
879 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
880 << ";\n";
881
882 OutStreamer->emitRawText(OS.str());
883}
884
885void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
886 const NVPTXSubtarget &STI) {
887 O << "//\n";
888 O << "// Generated by LLVM NVPTX Back-End\n";
889 O << "//\n";
890 O << "\n";
891
892 unsigned PTXVersion = STI.getPTXVersion();
893 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
894
895 O << ".target ";
896 O << STI.getTargetName();
897
898 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
899 if (NTM.getDrvInterface() == NVPTX::NVCL)
900 O << ", texmode_independent";
901
902 bool HasFullDebugInfo = false;
903 for (DICompileUnit *CU : M.debug_compile_units()) {
904 switch(CU->getEmissionKind()) {
907 break;
910 HasFullDebugInfo = true;
911 break;
912 }
913 if (HasFullDebugInfo)
914 break;
915 }
916 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
917 O << ", debug";
918
919 O << "\n";
920
921 O << ".address_size ";
922 if (NTM.is64Bit())
923 O << "64";
924 else
925 O << "32";
926 O << "\n";
927
928 O << "\n";
929}
930
932 bool HasDebugInfo = MMI && MMI->hasDebugInfo();
933
934 // If we did not emit any functions, then the global declarations have not
935 // yet been emitted.
936 if (!GlobalsEmitted) {
937 emitGlobals(M);
938 GlobalsEmitted = true;
939 }
940
941 // call doFinalization
942 bool ret = AsmPrinter::doFinalization(M);
943
945
946 auto *TS =
947 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
948 // Close the last emitted section
949 if (HasDebugInfo) {
950 TS->closeLastSection();
951 // Emit empty .debug_loc section for better support of the empty files.
952 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
953 }
954
955 // Output last DWARF .file directives, if any.
956 TS->outputDwarfFileDirectives();
957
958 return ret;
959}
960
961// This function emits appropriate linkage directives for
962// functions and global variables.
963//
964// extern function declaration -> .extern
965// extern function definition -> .visible
966// external global variable with init -> .visible
967// external without init -> .extern
968// appending -> not allowed, assert.
969// for any linkage other than
970// internal, private, linker_private,
971// linker_private_weak, linker_private_weak_def_auto,
972// we emit -> .weak.
973
974void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
975 raw_ostream &O) {
976 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
977 if (V->hasExternalLinkage()) {
978 if (isa<GlobalVariable>(V)) {
979 const GlobalVariable *GVar = cast<GlobalVariable>(V);
980 if (GVar) {
981 if (GVar->hasInitializer())
982 O << ".visible ";
983 else
984 O << ".extern ";
985 }
986 } else if (V->isDeclaration())
987 O << ".extern ";
988 else
989 O << ".visible ";
990 } else if (V->hasAppendingLinkage()) {
991 std::string msg;
992 msg.append("Error: ");
993 msg.append("Symbol ");
994 if (V->hasName())
995 msg.append(std::string(V->getName()));
996 msg.append("has unsupported appending linkage type");
997 llvm_unreachable(msg.c_str());
998 } else if (!V->hasInternalLinkage() &&
999 !V->hasPrivateLinkage()) {
1000 O << ".weak ";
1001 }
1002 }
1003}
1004
1005void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1006 raw_ostream &O, bool processDemoted,
1007 const NVPTXSubtarget &STI) {
1008 // Skip meta data
1009 if (GVar->hasSection()) {
1010 if (GVar->getSection() == "llvm.metadata")
1011 return;
1012 }
1013
1014 // Skip LLVM intrinsic global variables
1015 if (GVar->getName().starts_with("llvm.") ||
1016 GVar->getName().starts_with("nvvm."))
1017 return;
1018
1019 const DataLayout &DL = getDataLayout();
1020
1021 // GlobalVariables are always constant pointers themselves.
1022 PointerType *PTy = GVar->getType();
1023 Type *ETy = GVar->getValueType();
1024
1025 if (GVar->hasExternalLinkage()) {
1026 if (GVar->hasInitializer())
1027 O << ".visible ";
1028 else
1029 O << ".extern ";
1030 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1032 GVar->hasCommonLinkage()) {
1033 O << ".weak ";
1034 }
1035
1036 if (isTexture(*GVar)) {
1037 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1038 return;
1039 }
1040
1041 if (isSurface(*GVar)) {
1042 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1043 return;
1044 }
1045
1046 if (GVar->isDeclaration()) {
1047 // (extern) declarations, no definition or initializer
1048 // Currently the only known declaration is for an automatic __local
1049 // (.shared) promoted to global.
1050 emitPTXGlobalVariable(GVar, O, STI);
1051 O << ";\n";
1052 return;
1053 }
1054
1055 if (isSampler(*GVar)) {
1056 O << ".global .samplerref " << getSamplerName(*GVar);
1057
1058 const Constant *Initializer = nullptr;
1059 if (GVar->hasInitializer())
1060 Initializer = GVar->getInitializer();
1061 const ConstantInt *CI = nullptr;
1062 if (Initializer)
1063 CI = dyn_cast<ConstantInt>(Initializer);
1064 if (CI) {
1065 unsigned sample = CI->getZExtValue();
1066
1067 O << " = { ";
1068
1069 for (int i = 0,
1070 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1071 i < 3; i++) {
1072 O << "addr_mode_" << i << " = ";
1073 switch (addr) {
1074 case 0:
1075 O << "wrap";
1076 break;
1077 case 1:
1078 O << "clamp_to_border";
1079 break;
1080 case 2:
1081 O << "clamp_to_edge";
1082 break;
1083 case 3:
1084 O << "wrap";
1085 break;
1086 case 4:
1087 O << "mirror";
1088 break;
1089 }
1090 O << ", ";
1091 }
1092 O << "filter_mode = ";
1093 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1094 case 0:
1095 O << "nearest";
1096 break;
1097 case 1:
1098 O << "linear";
1099 break;
1100 case 2:
1101 llvm_unreachable("Anisotropic filtering is not supported");
1102 default:
1103 O << "nearest";
1104 break;
1105 }
1106 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1107 O << ", force_unnormalized_coords = 1";
1108 }
1109 O << " }";
1110 }
1111
1112 O << ";\n";
1113 return;
1114 }
1115
1116 if (GVar->hasPrivateLinkage()) {
1117 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1118 return;
1119
1120 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1121 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1122 return;
1123 if (GVar->use_empty())
1124 return;
1125 }
1126
1127 const Function *demotedFunc = nullptr;
1128 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1129 O << "// " << GVar->getName() << " has been demoted\n";
1130 if (localDecls.find(demotedFunc) != localDecls.end())
1131 localDecls[demotedFunc].push_back(GVar);
1132 else {
1133 std::vector<const GlobalVariable *> temp;
1134 temp.push_back(GVar);
1135 localDecls[demotedFunc] = temp;
1136 }
1137 return;
1138 }
1139
1140 O << ".";
1141 emitPTXAddressSpace(PTy->getAddressSpace(), O);
1142
1143 if (isManaged(*GVar)) {
1144 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1146 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1147 }
1148 O << " .attribute(.managed)";
1149 }
1150
1151 if (MaybeAlign A = GVar->getAlign())
1152 O << " .align " << A->value();
1153 else
1154 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1155
1156 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1157 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1158 O << " .";
1159 // Special case: ABI requires that we use .u8 for predicates
1160 if (ETy->isIntegerTy(1))
1161 O << "u8";
1162 else
1163 O << getPTXFundamentalTypeStr(ETy, false);
1164 O << " ";
1165 getSymbol(GVar)->print(O, MAI);
1166
1167 // Ptx allows variable initilization only for constant and global state
1168 // spaces.
1169 if (GVar->hasInitializer()) {
1170 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1171 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1172 const Constant *Initializer = GVar->getInitializer();
1173 // 'undef' is treated as there is no value specified.
1174 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1175 O << " = ";
1176 printScalarConstant(Initializer, O);
1177 }
1178 } else {
1179 // The frontend adds zero-initializer to device and constant variables
1180 // that don't have an initial value, and UndefValue to shared
1181 // variables, so skip warning for this case.
1182 if (!GVar->getInitializer()->isNullValue() &&
1183 !isa<UndefValue>(GVar->getInitializer())) {
1184 report_fatal_error("initial value of '" + GVar->getName() +
1185 "' is not allowed in addrspace(" +
1186 Twine(PTy->getAddressSpace()) + ")");
1187 }
1188 }
1189 }
1190 } else {
1191 uint64_t ElementSize = 0;
1192
1193 // Although PTX has direct support for struct type and array type and
1194 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1195 // targets that support these high level field accesses. Structs, arrays
1196 // and vectors are lowered into arrays of bytes.
1197 switch (ETy->getTypeID()) {
1198 case Type::IntegerTyID: // Integers larger than 64 bits
1199 case Type::StructTyID:
1200 case Type::ArrayTyID:
1202 ElementSize = DL.getTypeStoreSize(ETy);
1203 // Ptx allows variable initilization only for constant and
1204 // global state spaces.
1205 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1206 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1207 GVar->hasInitializer()) {
1208 const Constant *Initializer = GVar->getInitializer();
1209 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1210 AggBuffer aggBuffer(ElementSize, *this);
1211 bufferAggregateConstant(Initializer, &aggBuffer);
1212 if (aggBuffer.numSymbols()) {
1213 unsigned int ptrSize = MAI->getCodePointerSize();
1214 if (ElementSize % ptrSize ||
1215 !aggBuffer.allSymbolsAligned(ptrSize)) {
1216 // Print in bytes and use the mask() operator for pointers.
1217 if (!STI.hasMaskOperator())
1219 "initialized packed aggregate with pointers '" +
1220 GVar->getName() +
1221 "' requires at least PTX ISA version 7.1");
1222 O << " .u8 ";
1223 getSymbol(GVar)->print(O, MAI);
1224 O << "[" << ElementSize << "] = {";
1225 aggBuffer.printBytes(O);
1226 O << "}";
1227 } else {
1228 O << " .u" << ptrSize * 8 << " ";
1229 getSymbol(GVar)->print(O, MAI);
1230 O << "[" << ElementSize / ptrSize << "] = {";
1231 aggBuffer.printWords(O);
1232 O << "}";
1233 }
1234 } else {
1235 O << " .b8 ";
1236 getSymbol(GVar)->print(O, MAI);
1237 O << "[" << ElementSize << "] = {";
1238 aggBuffer.printBytes(O);
1239 O << "}";
1240 }
1241 } else {
1242 O << " .b8 ";
1243 getSymbol(GVar)->print(O, MAI);
1244 if (ElementSize) {
1245 O << "[";
1246 O << ElementSize;
1247 O << "]";
1248 }
1249 }
1250 } else {
1251 O << " .b8 ";
1252 getSymbol(GVar)->print(O, MAI);
1253 if (ElementSize) {
1254 O << "[";
1255 O << ElementSize;
1256 O << "]";
1257 }
1258 }
1259 break;
1260 default:
1261 llvm_unreachable("type not supported yet");
1262 }
1263 }
1264 O << ";\n";
1265}
1266
1267void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1268 const Value *v = Symbols[nSym];
1269 const Value *v0 = SymbolsBeforeStripping[nSym];
1270 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1271 MCSymbol *Name = AP.getSymbol(GVar);
1272 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1273 // Is v0 a generic pointer?
1274 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1275 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1276 os << "generic(";
1277 Name->print(os, AP.MAI);
1278 os << ")";
1279 } else {
1280 Name->print(os, AP.MAI);
1281 }
1282 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1283 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1284 AP.printMCExpr(*Expr, os);
1285 } else
1286 llvm_unreachable("symbol type unknown");
1287}
1288
1289void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1290 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1291 // Do not emit trailing zero initializers. They will be zero-initialized by
1292 // ptxas. This saves on both space requirements for the generated PTX and on
1293 // memory use by ptxas. (See:
1294 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1295 unsigned int InitializerCount = size;
1296 // TODO: symbols make this harder, but it would still be good to trim trailing
1297 // 0s for aggs with symbols as well.
1298 if (numSymbols() == 0)
1299 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1300 InitializerCount--;
1301
1302 symbolPosInBuffer.push_back(InitializerCount);
1303 unsigned int nSym = 0;
1304 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1305 for (unsigned int pos = 0; pos < InitializerCount;) {
1306 if (pos)
1307 os << ", ";
1308 if (pos != nextSymbolPos) {
1309 os << (unsigned int)buffer[pos];
1310 ++pos;
1311 continue;
1312 }
1313 // Generate a per-byte mask() operator for the symbol, which looks like:
1314 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1315 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1316 std::string symText;
1317 llvm::raw_string_ostream oss(symText);
1318 printSymbol(nSym, oss);
1319 for (unsigned i = 0; i < ptrSize; ++i) {
1320 if (i)
1321 os << ", ";
1322 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1323 os << "(" << symText << ")";
1324 }
1325 pos += ptrSize;
1326 nextSymbolPos = symbolPosInBuffer[++nSym];
1327 assert(nextSymbolPos >= pos);
1328 }
1329}
1330
1331void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1332 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1333 symbolPosInBuffer.push_back(size);
1334 unsigned int nSym = 0;
1335 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1336 assert(nextSymbolPos % ptrSize == 0);
1337 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1338 if (pos)
1339 os << ", ";
1340 if (pos == nextSymbolPos) {
1341 printSymbol(nSym, os);
1342 nextSymbolPos = symbolPosInBuffer[++nSym];
1343 assert(nextSymbolPos % ptrSize == 0);
1344 assert(nextSymbolPos >= pos + ptrSize);
1345 } else if (ptrSize == 4)
1346 os << support::endian::read32le(&buffer[pos]);
1347 else
1348 os << support::endian::read64le(&buffer[pos]);
1349 }
1350}
1351
1352void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1353 if (localDecls.find(f) == localDecls.end())
1354 return;
1355
1356 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1357
1358 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1359 const NVPTXSubtarget &STI =
1360 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1361
1362 for (const GlobalVariable *GV : gvars) {
1363 O << "\t// demoted variable\n\t";
1364 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1365 }
1366}
1367
1368void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1369 raw_ostream &O) const {
1370 switch (AddressSpace) {
1372 O << "local";
1373 break;
1375 O << "global";
1376 break;
1378 O << "const";
1379 break;
1381 O << "shared";
1382 break;
1383 default:
1384 report_fatal_error("Bad address space found while emitting PTX: " +
1386 break;
1387 }
1388}
1389
1390std::string
1391NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1392 switch (Ty->getTypeID()) {
1393 case Type::IntegerTyID: {
1394 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1395 if (NumBits == 1)
1396 return "pred";
1397 else if (NumBits <= 64) {
1398 std::string name = "u";
1399 return name + utostr(NumBits);
1400 } else {
1401 llvm_unreachable("Integer too large");
1402 break;
1403 }
1404 break;
1405 }
1406 case Type::BFloatTyID:
1407 case Type::HalfTyID:
1408 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1409 // PTX assembly.
1410 return "b16";
1411 case Type::FloatTyID:
1412 return "f32";
1413 case Type::DoubleTyID:
1414 return "f64";
1415 case Type::PointerTyID: {
1416 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1417 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1418
1419 if (PtrSize == 64)
1420 if (useB4PTR)
1421 return "b64";
1422 else
1423 return "u64";
1424 else if (useB4PTR)
1425 return "b32";
1426 else
1427 return "u32";
1428 }
1429 default:
1430 break;
1431 }
1432 llvm_unreachable("unexpected type");
1433}
1434
1435void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1436 raw_ostream &O,
1437 const NVPTXSubtarget &STI) {
1438 const DataLayout &DL = getDataLayout();
1439
1440 // GlobalVariables are always constant pointers themselves.
1441 Type *ETy = GVar->getValueType();
1442
1443 O << ".";
1444 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1445 if (isManaged(*GVar)) {
1446 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1448 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1449 }
1450 O << " .attribute(.managed)";
1451 }
1452 if (MaybeAlign A = GVar->getAlign())
1453 O << " .align " << A->value();
1454 else
1455 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1456
1457 // Special case for i128
1458 if (ETy->isIntegerTy(128)) {
1459 O << " .b8 ";
1460 getSymbol(GVar)->print(O, MAI);
1461 O << "[16]";
1462 return;
1463 }
1464
1465 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1466 O << " .";
1467 O << getPTXFundamentalTypeStr(ETy);
1468 O << " ";
1469 getSymbol(GVar)->print(O, MAI);
1470 return;
1471 }
1472
1473 int64_t ElementSize = 0;
1474
1475 // Although PTX has direct support for struct type and array type and LLVM IR
1476 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1477 // support these high level field accesses. Structs and arrays are lowered
1478 // into arrays of bytes.
1479 switch (ETy->getTypeID()) {
1480 case Type::StructTyID:
1481 case Type::ArrayTyID:
1483 ElementSize = DL.getTypeStoreSize(ETy);
1484 O << " .b8 ";
1485 getSymbol(GVar)->print(O, MAI);
1486 O << "[";
1487 if (ElementSize) {
1488 O << ElementSize;
1489 }
1490 O << "]";
1491 break;
1492 default:
1493 llvm_unreachable("type not supported yet");
1494 }
1495}
1496
1497void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1498 const DataLayout &DL = getDataLayout();
1499 const AttributeList &PAL = F->getAttributes();
1500 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1501 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1502
1504 unsigned paramIndex = 0;
1505 bool first = true;
1506 bool isKernelFunc = isKernelFunction(*F);
1507 bool isABI = (STI.getSmVersion() >= 20);
1508 bool hasImageHandles = STI.hasImageHandles();
1509
1510 if (F->arg_empty() && !F->isVarArg()) {
1511 O << "()";
1512 return;
1513 }
1514
1515 O << "(\n";
1516
1517 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1518 Type *Ty = I->getType();
1519
1520 if (!first)
1521 O << ",\n";
1522
1523 first = false;
1524
1525 // Handle image/sampler parameters
1526 if (isKernelFunction(*F)) {
1527 if (isSampler(*I) || isImage(*I)) {
1528 if (isImage(*I)) {
1529 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1530 if (hasImageHandles)
1531 O << "\t.param .u64 .ptr .surfref ";
1532 else
1533 O << "\t.param .surfref ";
1534 O << TLI->getParamName(F, paramIndex);
1535 }
1536 else { // Default image is read_only
1537 if (hasImageHandles)
1538 O << "\t.param .u64 .ptr .texref ";
1539 else
1540 O << "\t.param .texref ";
1541 O << TLI->getParamName(F, paramIndex);
1542 }
1543 } else {
1544 if (hasImageHandles)
1545 O << "\t.param .u64 .ptr .samplerref ";
1546 else
1547 O << "\t.param .samplerref ";
1548 O << TLI->getParamName(F, paramIndex);
1549 }
1550 continue;
1551 }
1552 }
1553
1554 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1555 paramIndex](Type *Ty) -> Align {
1556 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1557 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1558 return std::max(TypeAlign, ParamAlign.valueOrOne());
1559 };
1560
1561 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1562 if (ShouldPassAsArray(Ty)) {
1563 // Just print .param .align <a> .b8 .param[size];
1564 // <a> = optimal alignment for the element type; always multiple of
1565 // PAL.getParamAlignment
1566 // size = typeallocsize of element type
1567 Align OptimalAlign = getOptimalAlignForParam(Ty);
1568
1569 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1570 O << TLI->getParamName(F, paramIndex);
1571 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1572
1573 continue;
1574 }
1575 // Just a scalar
1576 auto *PTy = dyn_cast<PointerType>(Ty);
1577 unsigned PTySizeInBits = 0;
1578 if (PTy) {
1579 PTySizeInBits =
1580 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1581 assert(PTySizeInBits && "Invalid pointer size");
1582 }
1583
1584 if (isKernelFunc) {
1585 if (PTy) {
1586 // Special handling for pointer arguments to kernel
1587 O << "\t.param .u" << PTySizeInBits << " ";
1588
1589 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1590 NVPTX::CUDA) {
1591 int addrSpace = PTy->getAddressSpace();
1592 switch (addrSpace) {
1593 default:
1594 O << ".ptr ";
1595 break;
1597 O << ".ptr .const ";
1598 break;
1600 O << ".ptr .shared ";
1601 break;
1603 O << ".ptr .global ";
1604 break;
1605 }
1606 Align ParamAlign = I->getParamAlign().valueOrOne();
1607 O << ".align " << ParamAlign.value() << " ";
1608 }
1609 O << TLI->getParamName(F, paramIndex);
1610 continue;
1611 }
1612
1613 // non-pointer scalar to kernel func
1614 O << "\t.param .";
1615 // Special case: predicate operands become .u8 types
1616 if (Ty->isIntegerTy(1))
1617 O << "u8";
1618 else
1619 O << getPTXFundamentalTypeStr(Ty);
1620 O << " ";
1621 O << TLI->getParamName(F, paramIndex);
1622 continue;
1623 }
1624 // Non-kernel function, just print .param .b<size> for ABI
1625 // and .reg .b<size> for non-ABI
1626 unsigned sz = 0;
1627 if (isa<IntegerType>(Ty)) {
1628 sz = cast<IntegerType>(Ty)->getBitWidth();
1630 } else if (PTy) {
1631 assert(PTySizeInBits && "Invalid pointer size");
1632 sz = PTySizeInBits;
1633 } else
1634 sz = Ty->getPrimitiveSizeInBits();
1635 if (isABI)
1636 O << "\t.param .b" << sz << " ";
1637 else
1638 O << "\t.reg .b" << sz << " ";
1639 O << TLI->getParamName(F, paramIndex);
1640 continue;
1641 }
1642
1643 // param has byVal attribute.
1644 Type *ETy = PAL.getParamByValType(paramIndex);
1645 assert(ETy && "Param should have byval type");
1646
1647 if (isABI || isKernelFunc) {
1648 // Just print .param .align <a> .b8 .param[size];
1649 // <a> = optimal alignment for the element type; always multiple of
1650 // PAL.getParamAlignment
1651 // size = typeallocsize of element type
1652 Align OptimalAlign =
1653 isKernelFunc
1654 ? getOptimalAlignForParam(ETy)
1655 : TLI->getFunctionByValParamAlign(
1656 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1657
1658 unsigned sz = DL.getTypeAllocSize(ETy);
1659 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1660 O << TLI->getParamName(F, paramIndex);
1661 O << "[" << sz << "]";
1662 continue;
1663 } else {
1664 // Split the ETy into constituent parts and
1665 // print .param .b<size> <name> for each part.
1666 // Further, if a part is vector, print the above for
1667 // each vector element.
1668 SmallVector<EVT, 16> vtparts;
1669 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1670 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1671 unsigned elems = 1;
1672 EVT elemtype = vtparts[i];
1673 if (vtparts[i].isVector()) {
1674 elems = vtparts[i].getVectorNumElements();
1675 elemtype = vtparts[i].getVectorElementType();
1676 }
1677
1678 for (unsigned j = 0, je = elems; j != je; ++j) {
1679 unsigned sz = elemtype.getSizeInBits();
1680 if (elemtype.isInteger())
1682 O << "\t.reg .b" << sz << " ";
1683 O << TLI->getParamName(F, paramIndex);
1684 if (j < je - 1)
1685 O << ",\n";
1686 ++paramIndex;
1687 }
1688 if (i < e - 1)
1689 O << ",\n";
1690 }
1691 --paramIndex;
1692 continue;
1693 }
1694 }
1695
1696 if (F->isVarArg()) {
1697 if (!first)
1698 O << ",\n";
1699 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1700 O << " .b8 ";
1701 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1702 }
1703
1704 O << "\n)";
1705}
1706
1707void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1708 const MachineFunction &MF) {
1709 SmallString<128> Str;
1711
1712 // Map the global virtual register number to a register class specific
1713 // virtual register number starting from 1 with that class.
1715 //unsigned numRegClasses = TRI->getNumRegClasses();
1716
1717 // Emit the Fake Stack Object
1718 const MachineFrameInfo &MFI = MF.getFrameInfo();
1719 int NumBytes = (int) MFI.getStackSize();
1720 if (NumBytes) {
1721 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1722 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1723 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1724 O << "\t.reg .b64 \t%SP;\n";
1725 O << "\t.reg .b64 \t%SPL;\n";
1726 } else {
1727 O << "\t.reg .b32 \t%SP;\n";
1728 O << "\t.reg .b32 \t%SPL;\n";
1729 }
1730 }
1731
1732 // Go through all virtual registers to establish the mapping between the
1733 // global virtual
1734 // register number and the per class virtual register number.
1735 // We use the per class virtual register number in the ptx output.
1736 unsigned int numVRs = MRI->getNumVirtRegs();
1737 for (unsigned i = 0; i < numVRs; i++) {
1739 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1740 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1741 int n = regmap.size();
1742 regmap.insert(std::make_pair(vr, n + 1));
1743 }
1744
1745 // Emit register declarations
1746 // @TODO: Extract out the real register usage
1747 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1748 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1749 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1750 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1751 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1752 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1753 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1754
1755 // Emit declaration of the virtual registers or 'physical' registers for
1756 // each register class
1757 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1758 const TargetRegisterClass *RC = TRI->getRegClass(i);
1759 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1760 std::string rcname = getNVPTXRegClassName(RC);
1761 std::string rcStr = getNVPTXRegClassStr(RC);
1762 int n = regmap.size();
1763
1764 // Only declare those registers that may be used.
1765 if (n) {
1766 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1767 << ">;\n";
1768 }
1769 }
1770
1771 OutStreamer->emitRawText(O.str());
1772}
1773
1774void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1775 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1776 bool ignored;
1777 unsigned int numHex;
1778 const char *lead;
1779
1780 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1781 numHex = 8;
1782 lead = "0f";
1784 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1785 numHex = 16;
1786 lead = "0d";
1788 } else
1789 llvm_unreachable("unsupported fp type");
1790
1791 APInt API = APF.bitcastToAPInt();
1792 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1793}
1794
1795void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1796 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1797 O << CI->getValue();
1798 return;
1799 }
1800 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1801 printFPConstant(CFP, O);
1802 return;
1803 }
1804 if (isa<ConstantPointerNull>(CPV)) {
1805 O << "0";
1806 return;
1807 }
1808 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1809 bool IsNonGenericPointer = false;
1810 if (GVar->getType()->getAddressSpace() != 0) {
1811 IsNonGenericPointer = true;
1812 }
1813 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1814 O << "generic(";
1815 getSymbol(GVar)->print(O, MAI);
1816 O << ")";
1817 } else {
1818 getSymbol(GVar)->print(O, MAI);
1819 }
1820 return;
1821 }
1822 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1823 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1824 printMCExpr(*E, O);
1825 return;
1826 }
1827 llvm_unreachable("Not scalar type found in printScalarConstant()");
1828}
1829
1830void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1831 AggBuffer *AggBuffer) {
1832 const DataLayout &DL = getDataLayout();
1833 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1834 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1835 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1836 // only the space allocated by CPV.
1837 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1838 return;
1839 }
1840
1841 // Helper for filling AggBuffer with APInts.
1842 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1843 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1844 SmallVector<unsigned char, 16> Buf(NumBytes);
1845 for (unsigned I = 0; I < NumBytes; ++I) {
1846 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1847 }
1848 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1849 };
1850
1851 switch (CPV->getType()->getTypeID()) {
1852 case Type::IntegerTyID:
1853 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1854 AddIntToBuffer(CI->getValue());
1855 break;
1856 }
1857 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1858 if (const auto *CI =
1859 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1860 AddIntToBuffer(CI->getValue());
1861 break;
1862 }
1863 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1864 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1865 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1866 AggBuffer->addZeros(AllocSize);
1867 break;
1868 }
1869 }
1870 llvm_unreachable("unsupported integer const type");
1871 break;
1872
1873 case Type::HalfTyID:
1874 case Type::BFloatTyID:
1875 case Type::FloatTyID:
1876 case Type::DoubleTyID:
1877 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1878 break;
1879
1880 case Type::PointerTyID: {
1881 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1882 AggBuffer->addSymbol(GVar, GVar);
1883 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1884 const Value *v = Cexpr->stripPointerCasts();
1885 AggBuffer->addSymbol(v, Cexpr);
1886 }
1887 AggBuffer->addZeros(AllocSize);
1888 break;
1889 }
1890
1891 case Type::ArrayTyID:
1893 case Type::StructTyID: {
1894 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1895 bufferAggregateConstant(CPV, AggBuffer);
1896 if (Bytes > AllocSize)
1897 AggBuffer->addZeros(Bytes - AllocSize);
1898 } else if (isa<ConstantAggregateZero>(CPV))
1899 AggBuffer->addZeros(Bytes);
1900 else
1901 llvm_unreachable("Unexpected Constant type");
1902 break;
1903 }
1904
1905 default:
1906 llvm_unreachable("unsupported type");
1907 }
1908}
1909
1910void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1911 AggBuffer *aggBuffer) {
1912 const DataLayout &DL = getDataLayout();
1913 int Bytes;
1914
1915 // Integers of arbitrary width
1916 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1917 APInt Val = CI->getValue();
1918 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1919 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1920 aggBuffer->addBytes(&Byte, 1, 1);
1921 Val.lshrInPlace(8);
1922 }
1923 return;
1924 }
1925
1926 // Old constants
1927 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1928 if (CPV->getNumOperands())
1929 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1930 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1931 return;
1932 }
1933
1934 if (const ConstantDataSequential *CDS =
1935 dyn_cast<ConstantDataSequential>(CPV)) {
1936 if (CDS->getNumElements())
1937 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1938 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1939 aggBuffer);
1940 return;
1941 }
1942
1943 if (isa<ConstantStruct>(CPV)) {
1944 if (CPV->getNumOperands()) {
1945 StructType *ST = cast<StructType>(CPV->getType());
1946 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1947 if (i == (e - 1))
1948 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1949 DL.getTypeAllocSize(ST) -
1950 DL.getStructLayout(ST)->getElementOffset(i);
1951 else
1952 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1953 DL.getStructLayout(ST)->getElementOffset(i);
1954 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1955 }
1956 }
1957 return;
1958 }
1959 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1960}
1961
1962/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1963/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1964/// expressions that are representable in PTX and create
1965/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1966const MCExpr *
1967NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1968 MCContext &Ctx = OutContext;
1969
1970 if (CV->isNullValue() || isa<UndefValue>(CV))
1971 return MCConstantExpr::create(0, Ctx);
1972
1973 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1974 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1975
1976 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1977 const MCSymbolRefExpr *Expr =
1979 if (ProcessingGeneric) {
1980 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1981 } else {
1982 return Expr;
1983 }
1984 }
1985
1986 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1987 if (!CE) {
1988 llvm_unreachable("Unknown constant value to lower!");
1989 }
1990
1991 switch (CE->getOpcode()) {
1992 default:
1993 break; // Error
1994
1995 case Instruction::AddrSpaceCast: {
1996 // Strip the addrspacecast and pass along the operand
1997 PointerType *DstTy = cast<PointerType>(CE->getType());
1998 if (DstTy->getAddressSpace() == 0)
1999 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2000
2001 break; // Error
2002 }
2003
2004 case Instruction::GetElementPtr: {
2005 const DataLayout &DL = getDataLayout();
2006
2007 // Generate a symbolic expression for the byte address
2008 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2009 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2010
2011 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2012 ProcessingGeneric);
2013 if (!OffsetAI)
2014 return Base;
2015
2016 int64_t Offset = OffsetAI.getSExtValue();
2018 Ctx);
2019 }
2020
2021 case Instruction::Trunc:
2022 // We emit the value and depend on the assembler to truncate the generated
2023 // expression properly. This is important for differences between
2024 // blockaddress labels. Since the two labels are in the same function, it
2025 // is reasonable to treat their delta as a 32-bit value.
2026 [[fallthrough]];
2027 case Instruction::BitCast:
2028 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2029
2030 case Instruction::IntToPtr: {
2031 const DataLayout &DL = getDataLayout();
2032
2033 // Handle casts to pointers by changing them into casts to the appropriate
2034 // integer type. This promotes constant folding and simplifies this code.
2035 Constant *Op = CE->getOperand(0);
2036 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2037 /*IsSigned*/ false, DL);
2038 if (Op)
2039 return lowerConstantForGV(Op, ProcessingGeneric);
2040
2041 break; // Error
2042 }
2043
2044 case Instruction::PtrToInt: {
2045 const DataLayout &DL = getDataLayout();
2046
2047 // Support only foldable casts to/from pointers that can be eliminated by
2048 // changing the pointer to the appropriately sized integer type.
2049 Constant *Op = CE->getOperand(0);
2050 Type *Ty = CE->getType();
2051
2052 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2053
2054 // We can emit the pointer value into this slot if the slot is an
2055 // integer slot equal to the size of the pointer.
2056 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2057 return OpExpr;
2058
2059 // Otherwise the pointer is smaller than the resultant integer, mask off
2060 // the high bits so we are sure to get a proper truncation if the input is
2061 // a constant expr.
2062 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2063 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2064 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2065 }
2066
2067 // The MC library also has a right-shift operator, but it isn't consistently
2068 // signed or unsigned between different targets.
2069 case Instruction::Add: {
2070 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2071 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2072 switch (CE->getOpcode()) {
2073 default: llvm_unreachable("Unknown binary operator constant cast expr");
2074 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2075 }
2076 }
2077 }
2078
2079 // If the code isn't optimized, there may be outstanding folding
2080 // opportunities. Attempt to fold the expression using DataLayout as a
2081 // last resort before giving up.
2083 if (C != CE)
2084 return lowerConstantForGV(C, ProcessingGeneric);
2085
2086 // Otherwise report the problem to the user.
2087 std::string S;
2089 OS << "Unsupported expression in static initializer: ";
2090 CE->printAsOperand(OS, /*PrintType=*/false,
2091 !MF ? nullptr : MF->getFunction().getParent());
2092 report_fatal_error(Twine(OS.str()));
2093}
2094
2095// Copy of MCExpr::print customized for NVPTX
2096void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2097 switch (Expr.getKind()) {
2098 case MCExpr::Target:
2099 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2100 case MCExpr::Constant:
2101 OS << cast<MCConstantExpr>(Expr).getValue();
2102 return;
2103
2104 case MCExpr::SymbolRef: {
2105 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2106 const MCSymbol &Sym = SRE.getSymbol();
2107 Sym.print(OS, MAI);
2108 return;
2109 }
2110
2111 case MCExpr::Unary: {
2112 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2113 switch (UE.getOpcode()) {
2114 case MCUnaryExpr::LNot: OS << '!'; break;
2115 case MCUnaryExpr::Minus: OS << '-'; break;
2116 case MCUnaryExpr::Not: OS << '~'; break;
2117 case MCUnaryExpr::Plus: OS << '+'; break;
2118 }
2119 printMCExpr(*UE.getSubExpr(), OS);
2120 return;
2121 }
2122
2123 case MCExpr::Binary: {
2124 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2125
2126 // Only print parens around the LHS if it is non-trivial.
2127 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2128 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2129 printMCExpr(*BE.getLHS(), OS);
2130 } else {
2131 OS << '(';
2132 printMCExpr(*BE.getLHS(), OS);
2133 OS<< ')';
2134 }
2135
2136 switch (BE.getOpcode()) {
2137 case MCBinaryExpr::Add:
2138 // Print "X-42" instead of "X+-42".
2139 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2140 if (RHSC->getValue() < 0) {
2141 OS << RHSC->getValue();
2142 return;
2143 }
2144 }
2145
2146 OS << '+';
2147 break;
2148 default: llvm_unreachable("Unhandled binary operator");
2149 }
2150
2151 // Only print parens around the LHS if it is non-trivial.
2152 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2153 printMCExpr(*BE.getRHS(), OS);
2154 } else {
2155 OS << '(';
2156 printMCExpr(*BE.getRHS(), OS);
2157 OS << ')';
2158 }
2159 return;
2160 }
2161 }
2162
2163 llvm_unreachable("Invalid expression kind!");
2164}
2165
2166/// PrintAsmOperand - Print out an operand for an inline asm expression.
2167///
2168bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2169 const char *ExtraCode, raw_ostream &O) {
2170 if (ExtraCode && ExtraCode[0]) {
2171 if (ExtraCode[1] != 0)
2172 return true; // Unknown modifier.
2173
2174 switch (ExtraCode[0]) {
2175 default:
2176 // See if this is a generic print operand
2177 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2178 case 'r':
2179 break;
2180 }
2181 }
2182
2183 printOperand(MI, OpNo, O);
2184
2185 return false;
2186}
2187
2188bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2189 unsigned OpNo,
2190 const char *ExtraCode,
2191 raw_ostream &O) {
2192 if (ExtraCode && ExtraCode[0])
2193 return true; // Unknown modifier
2194
2195 O << '[';
2196 printMemOperand(MI, OpNo, O);
2197 O << ']';
2198
2199 return false;
2200}
2201
2202void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2203 raw_ostream &O) {
2204 const MachineOperand &MO = MI->getOperand(OpNum);
2205 switch (MO.getType()) {
2207 if (MO.getReg().isPhysical()) {
2208 if (MO.getReg() == NVPTX::VRDepot)
2210 else
2212 } else {
2213 emitVirtualRegister(MO.getReg(), O);
2214 }
2215 break;
2216
2218 O << MO.getImm();
2219 break;
2220
2222 printFPConstant(MO.getFPImm(), O);
2223 break;
2224
2226 PrintSymbolOperand(MO, O);
2227 break;
2228
2230 MO.getMBB()->getSymbol()->print(O, MAI);
2231 break;
2232
2233 default:
2234 llvm_unreachable("Operand type not supported.");
2235 }
2236}
2237
2238void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2239 raw_ostream &O, const char *Modifier) {
2240 printOperand(MI, OpNum, O);
2241
2242 if (Modifier && strcmp(Modifier, "add") == 0) {
2243 O << ", ";
2244 printOperand(MI, OpNum + 1, O);
2245 } else {
2246 if (MI->getOperand(OpNum + 1).isImm() &&
2247 MI->getOperand(OpNum + 1).getImm() == 0)
2248 return; // don't print ',0' or '+0'
2249 O << "+";
2250 printOperand(MI, OpNum + 1, O);
2251 }
2252}
2253
2254// Force static initialization.
2258}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static cl::opt< bool > LowerCtorDtor("nvptx-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(false), cl::Hidden)
static bool ShouldPassAsArray(Type *Ty)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:49
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:698
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:418
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:449
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:394
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(Twine Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:395
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:402
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:422
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:413
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:214
ConstantArray - Constant Array Declarations.
Definition: Constants.h:422
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:582
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:267
const APFloat & getValueAPF() const
Definition: Constants.h:310
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:110
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:514
bool hasExternalLinkage() const
Definition: GlobalValue.h:510
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:274
bool hasLocalLinkage() const
Definition: GlobalValue.h:527
bool hasPrivateLinkage() const
Definition: GlobalValue.h:526
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
bool hasWeakLinkage() const
Definition: GlobalValue.h:521
bool hasCommonLinkage() const
Definition: GlobalValue.h:531
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:511
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:553
Binary assembler expressions.
Definition: MCExpr.h:490
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:637
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:640
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:539
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:534
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:634
@ Add
Addition.
Definition: MCExpr.h:493
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:76
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:200
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ Unary
Unary expressions.
Definition: MCExpr.h:41
@ Constant
Constant expressions.
Definition: MCExpr.h:39
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
@ Target
Target specific expression.
Definition: MCExpr.h:42
@ Binary
Binary expressions.
Definition: MCExpr.h:38
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:408
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:395
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:434
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:477
@ Minus
Unary minus.
Definition: MCExpr.h:438
@ Plus
Unary plus.
Definition: MCExpr.h:440
@ Not
Bitwise negation.
Definition: MCExpr.h:439
@ LNot
Logical negation.
Definition: MCExpr.h:437
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:480
Metadata node.
Definition: Metadata.h:1067
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool hasDebugInfo() const
Returns true if valid debug info is present.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:59
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:679
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:216
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:74
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:73
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:79
@ CUDA
Definition: NVPTX.h:80
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
uint64_t read64le(const void *P)
Definition: Endian.h:412
uint32_t read32le(const void *P)
Definition: Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
bool getMaxNTIDz(const Function &F, unsigned &z)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
std::string getSurfaceName(const Value &val)
bool getReqNTIDx(const Function &F, unsigned &x)
bool getReqNTIDy(const Function &F, unsigned &y)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:200
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool getReqNTIDz(const Function &F, unsigned &z)
bool getMaxNTIDx(const Function &F, unsigned &x)
bool getMaxNTIDy(const Function &F, unsigned &y)
bool isSampler(const Value &val)
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool getMaxClusterRank(const Function &F, unsigned &x)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:927
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...