LLVM 17.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/BasicBlock.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
56#include "llvm/IR/DebugLoc.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalValue.h"
61#include "llvm/IR/Instruction.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/Operator.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/User.h"
67#include "llvm/MC/MCExpr.h"
68#include "llvm/MC/MCInst.h"
69#include "llvm/MC/MCInstrDesc.h"
70#include "llvm/MC/MCStreamer.h"
71#include "llvm/MC/MCSymbol.h"
75#include "llvm/Support/Endian.h"
79#include "llvm/Support/Path.h"
85#include <cassert>
86#include <cstdint>
87#include <cstring>
88#include <new>
89#include <string>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94
95#define DEPOTNAME "__local_depot"
96
97/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
98/// depends.
99static void
102 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
103 Globals.insert(GV);
104 else {
105 if (const User *U = dyn_cast<User>(V)) {
106 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
107 DiscoverDependentGlobals(U->getOperand(i), Globals);
108 }
109 }
110 }
111}
112
113/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
114/// instances to be emitted, but only after any dependents have been added
115/// first.s
116static void
121 // Have we already visited this one?
122 if (Visited.count(GV))
123 return;
124
125 // Do we have a circular dependency?
126 if (!Visiting.insert(GV).second)
127 report_fatal_error("Circular dependency found in global variable set");
128
129 // Make sure we visit all dependents first
131 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
132 DiscoverDependentGlobals(GV->getOperand(i), Others);
133
134 for (const GlobalVariable *GV : Others)
135 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
136
137 // Now we can visit ourself
138 Order.push_back(GV);
139 Visited.insert(GV);
140 Visiting.erase(GV);
141}
142
143void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
144 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
145 getSubtargetInfo().getFeatureBits());
146
147 MCInst Inst;
148 lowerToMCInst(MI, Inst);
150}
151
152// Handle symbol backtracking for targets that do not support image handles
153bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
154 unsigned OpNo, MCOperand &MCOp) {
155 const MachineOperand &MO = MI->getOperand(OpNo);
156 const MCInstrDesc &MCID = MI->getDesc();
157
158 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
159 // This is a texture fetch, so operand 4 is a texref and operand 5 is
160 // a samplerref
161 if (OpNo == 4 && MO.isImm()) {
162 lowerImageHandleSymbol(MO.getImm(), MCOp);
163 return true;
164 }
165 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
166 lowerImageHandleSymbol(MO.getImm(), MCOp);
167 return true;
168 }
169
170 return false;
171 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
172 unsigned VecSize =
173 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
174
175 // For a surface load of vector size N, the Nth operand will be the surfref
176 if (OpNo == VecSize && MO.isImm()) {
177 lowerImageHandleSymbol(MO.getImm(), MCOp);
178 return true;
179 }
180
181 return false;
182 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
183 // This is a surface store, so operand 0 is a surfref
184 if (OpNo == 0 && MO.isImm()) {
185 lowerImageHandleSymbol(MO.getImm(), MCOp);
186 return true;
187 }
188
189 return false;
190 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
191 // This is a query, so operand 1 is a surfref/texref
192 if (OpNo == 1 && MO.isImm()) {
193 lowerImageHandleSymbol(MO.getImm(), MCOp);
194 return true;
195 }
196
197 return false;
198 }
199
200 return false;
201}
202
203void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
204 // Ewwww
206 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
208 const char *Sym = MFI->getImageHandleSymbol(Index);
209 StringRef SymName = nvTM.getStrPool().save(Sym);
210 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
211}
212
213void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
214 OutMI.setOpcode(MI->getOpcode());
215 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
216 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
217 const MachineOperand &MO = MI->getOperand(0);
218 OutMI.addOperand(GetSymbolRef(
220 return;
221 }
222
223 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
224 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
225 const MachineOperand &MO = MI->getOperand(i);
226
227 MCOperand MCOp;
228 if (!STI.hasImageHandles()) {
229 if (lowerImageHandleOperand(MI, i, MCOp)) {
230 OutMI.addOperand(MCOp);
231 continue;
232 }
233 }
234
235 if (lowerOperand(MO, MCOp))
236 OutMI.addOperand(MCOp);
237 }
238}
239
240bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
241 MCOperand &MCOp) {
242 switch (MO.getType()) {
243 default: llvm_unreachable("unknown operand type");
245 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
246 break;
248 MCOp = MCOperand::createImm(MO.getImm());
249 break;
252 MO.getMBB()->getSymbol(), OutContext));
253 break;
255 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
256 break;
258 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
259 break;
261 const ConstantFP *Cnt = MO.getFPImm();
262 const APFloat &Val = Cnt->getValueAPF();
263
264 switch (Cnt->getType()->getTypeID()) {
265 default: report_fatal_error("Unsupported FP type"); break;
266 case Type::HalfTyID:
269 break;
270 case Type::FloatTyID:
273 break;
274 case Type::DoubleTyID:
277 break;
278 }
279 break;
280 }
281 }
282 return true;
283}
284
285unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
287 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
288
289 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
290 unsigned RegNum = RegMap[Reg];
291
292 // Encode the register class in the upper 4 bits
293 // Must be kept in sync with NVPTXInstPrinter::printRegName
294 unsigned Ret = 0;
295 if (RC == &NVPTX::Int1RegsRegClass) {
296 Ret = (1 << 28);
297 } else if (RC == &NVPTX::Int16RegsRegClass) {
298 Ret = (2 << 28);
299 } else if (RC == &NVPTX::Int32RegsRegClass) {
300 Ret = (3 << 28);
301 } else if (RC == &NVPTX::Int64RegsRegClass) {
302 Ret = (4 << 28);
303 } else if (RC == &NVPTX::Float32RegsRegClass) {
304 Ret = (5 << 28);
305 } else if (RC == &NVPTX::Float64RegsRegClass) {
306 Ret = (6 << 28);
307 } else if (RC == &NVPTX::Float16RegsRegClass) {
308 Ret = (7 << 28);
309 } else if (RC == &NVPTX::Float16x2RegsRegClass) {
310 Ret = (8 << 28);
311 } else {
312 report_fatal_error("Bad register class");
313 }
314
315 // Insert the vreg number
316 Ret |= (RegNum & 0x0FFFFFFF);
317 return Ret;
318 } else {
319 // Some special-use registers are actually physical registers.
320 // Encode this as the register class ID of 0 and the real register ID.
321 return Reg & 0x0FFFFFFF;
322 }
323}
324
325MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
326 const MCExpr *Expr;
328 OutContext);
329 return MCOperand::createExpr(Expr);
330}
331
332void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
333 const DataLayout &DL = getDataLayout();
335 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
336
337 Type *Ty = F->getReturnType();
338
339 bool isABI = (STI.getSmVersion() >= 20);
340
341 if (Ty->getTypeID() == Type::VoidTyID)
342 return;
343
344 O << " (";
345
346 if (isABI) {
347 if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
348 unsigned size = 0;
349 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
350 size = ITy->getBitWidth();
351 } else {
352 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
354 }
355 // PTX ABI requires all scalar return values to be at least 32
356 // bits in size. fp16 normally uses .b16 as its storage type in
357 // PTX, so its size must be adjusted here, too.
359
360 O << ".param .b" << size << " func_retval0";
361 } else if (isa<PointerType>(Ty)) {
362 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
363 << " func_retval0";
364 } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
365 unsigned totalsz = DL.getTypeAllocSize(Ty);
366 unsigned retAlignment = 0;
367 if (!getAlign(*F, 0, retAlignment))
368 retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
369 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
370 << "]";
371 } else
372 llvm_unreachable("Unknown return type");
373 } else {
374 SmallVector<EVT, 16> vtparts;
375 ComputeValueVTs(*TLI, DL, Ty, vtparts);
376 unsigned idx = 0;
377 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
378 unsigned elems = 1;
379 EVT elemtype = vtparts[i];
380 if (vtparts[i].isVector()) {
381 elems = vtparts[i].getVectorNumElements();
382 elemtype = vtparts[i].getVectorElementType();
383 }
384
385 for (unsigned j = 0, je = elems; j != je; ++j) {
386 unsigned sz = elemtype.getSizeInBits();
387 if (elemtype.isInteger())
389 O << ".reg .b" << sz << " func_retval" << idx;
390 if (j < je - 1)
391 O << ", ";
392 ++idx;
393 }
394 if (i < e - 1)
395 O << ", ";
396 }
397 }
398 O << ") ";
399}
400
401void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
402 raw_ostream &O) {
403 const Function &F = MF.getFunction();
404 printReturnValStr(&F, O);
405}
406
407// Return true if MBB is the header of a loop marked with
408// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
409bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
410 const MachineBasicBlock &MBB) const {
411 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
412 // We insert .pragma "nounroll" only to the loop header.
413 if (!LI.isLoopHeader(&MBB))
414 return false;
415
416 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
417 // we iterate through each back edge of the loop with header MBB, and check
418 // whether its metadata contains llvm.loop.unroll.disable.
419 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
420 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
421 // Edges from other loops to MBB are not back edges.
422 continue;
423 }
424 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
425 if (MDNode *LoopID =
426 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
427 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
428 return true;
429 if (MDNode *UnrollCountMD =
430 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
431 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
432 ->isOne())
433 return true;
434 }
435 }
436 }
437 }
438 return false;
439}
440
441void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
443 if (isLoopHeaderOfNoUnroll(MBB))
444 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
445}
446
447void NVPTXAsmPrinter::emitFunctionEntryLabel() {
450
451 if (!GlobalsEmitted) {
452 emitGlobals(*MF->getFunction().getParent());
453 GlobalsEmitted = true;
454 }
455
456 // Set up
457 MRI = &MF->getRegInfo();
458 F = &MF->getFunction();
459 emitLinkageDirective(F, O);
460 if (isKernelFunction(*F))
461 O << ".entry ";
462 else {
463 O << ".func ";
464 printReturnValStr(*MF, O);
465 }
466
468
469 emitFunctionParamList(F, O);
470
471 if (isKernelFunction(*F))
472 emitKernelFunctionDirectives(*F, O);
473
475 O << ".noreturn";
476
477 OutStreamer->emitRawText(O.str());
478
479 VRegMapping.clear();
480 // Emit open brace for function body.
481 OutStreamer->emitRawText(StringRef("{\n"));
482 setAndEmitFunctionVirtualRegisters(*MF);
483 // Emit initial .loc debug directive for correct relocation symbol data.
484 if (MMI && MMI->hasDebugInfo())
486}
487
489 bool Result = AsmPrinter::runOnMachineFunction(F);
490 // Emit closing brace for the body of function F.
491 // The closing brace must be emitted here because we need to emit additional
492 // debug labels/data after the last basic block.
493 // We need to emit the closing brace here because we don't have function that
494 // finished emission of the function body.
495 OutStreamer->emitRawText(StringRef("}\n"));
496 return Result;
497}
498
499void NVPTXAsmPrinter::emitFunctionBodyStart() {
501 raw_svector_ostream O(Str);
502 emitDemotedVars(&MF->getFunction(), O);
503 OutStreamer->emitRawText(O.str());
504}
505
506void NVPTXAsmPrinter::emitFunctionBodyEnd() {
507 VRegMapping.clear();
508}
509
513 return OutContext.getOrCreateSymbol(Str);
514}
515
516void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
517 Register RegNo = MI->getOperand(0).getReg();
518 if (RegNo.isVirtual()) {
519 OutStreamer->AddComment(Twine("implicit-def: ") +
521 } else {
522 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
523 OutStreamer->AddComment(Twine("implicit-def: ") +
524 STI.getRegisterInfo()->getName(RegNo));
525 }
526 OutStreamer->addBlankLine();
527}
528
529void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
530 raw_ostream &O) const {
531 // If the NVVM IR has some of reqntid* specified, then output
532 // the reqntid directive, and set the unspecified ones to 1.
533 // If none of reqntid* is specified, don't output reqntid directive.
534 unsigned reqntidx, reqntidy, reqntidz;
535 bool specified = false;
536 if (!getReqNTIDx(F, reqntidx))
537 reqntidx = 1;
538 else
539 specified = true;
540 if (!getReqNTIDy(F, reqntidy))
541 reqntidy = 1;
542 else
543 specified = true;
544 if (!getReqNTIDz(F, reqntidz))
545 reqntidz = 1;
546 else
547 specified = true;
548
549 if (specified)
550 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
551 << "\n";
552
553 // If the NVVM IR has some of maxntid* specified, then output
554 // the maxntid directive, and set the unspecified ones to 1.
555 // If none of maxntid* is specified, don't output maxntid directive.
556 unsigned maxntidx, maxntidy, maxntidz;
557 specified = false;
558 if (!getMaxNTIDx(F, maxntidx))
559 maxntidx = 1;
560 else
561 specified = true;
562 if (!getMaxNTIDy(F, maxntidy))
563 maxntidy = 1;
564 else
565 specified = true;
566 if (!getMaxNTIDz(F, maxntidz))
567 maxntidz = 1;
568 else
569 specified = true;
570
571 if (specified)
572 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
573 << "\n";
574
575 unsigned mincta;
576 if (getMinCTASm(F, mincta))
577 O << ".minnctapersm " << mincta << "\n";
578
579 unsigned maxnreg;
580 if (getMaxNReg(F, maxnreg))
581 O << ".maxnreg " << maxnreg << "\n";
582}
583
584std::string
586 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
587
588 std::string Name;
589 raw_string_ostream NameStr(Name);
590
591 VRegRCMap::const_iterator I = VRegMapping.find(RC);
592 assert(I != VRegMapping.end() && "Bad register class");
593 const DenseMap<unsigned, unsigned> &RegMap = I->second;
594
595 VRegMap::const_iterator VI = RegMap.find(Reg);
596 assert(VI != RegMap.end() && "Bad virtual register");
597 unsigned MappedVR = VI->second;
598
599 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
600
601 NameStr.flush();
602 return Name;
603}
604
605void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
606 raw_ostream &O) {
607 O << getVirtualRegisterName(vr);
608}
609
610void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
611 emitLinkageDirective(F, O);
612 if (isKernelFunction(*F))
613 O << ".entry ";
614 else
615 O << ".func ";
616 printReturnValStr(F, O);
617 getSymbol(F)->print(O, MAI);
618 O << "\n";
619 emitFunctionParamList(F, O);
621 O << ".noreturn";
622 O << ";\n";
623}
624
625static bool usedInGlobalVarDef(const Constant *C) {
626 if (!C)
627 return false;
628
629 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
630 return GV->getName() != "llvm.used";
631 }
632
633 for (const User *U : C->users())
634 if (const Constant *C = dyn_cast<Constant>(U))
636 return true;
637
638 return false;
639}
640
641static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
642 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
643 if (othergv->getName() == "llvm.used")
644 return true;
645 }
646
647 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
648 if (instr->getParent() && instr->getParent()->getParent()) {
649 const Function *curFunc = instr->getParent()->getParent();
650 if (oneFunc && (curFunc != oneFunc))
651 return false;
652 oneFunc = curFunc;
653 return true;
654 } else
655 return false;
656 }
657
658 for (const User *UU : U->users())
659 if (!usedInOneFunc(UU, oneFunc))
660 return false;
661
662 return true;
663}
664
665/* Find out if a global variable can be demoted to local scope.
666 * Currently, this is valid for CUDA shared variables, which have local
667 * scope and global lifetime. So the conditions to check are :
668 * 1. Is the global variable in shared address space?
669 * 2. Does it have internal linkage?
670 * 3. Is the global variable referenced only in one function?
671 */
672static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
673 if (!gv->hasInternalLinkage())
674 return false;
675 PointerType *Pty = gv->getType();
676 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
677 return false;
678
679 const Function *oneFunc = nullptr;
680
681 bool flag = usedInOneFunc(gv, oneFunc);
682 if (!flag)
683 return false;
684 if (!oneFunc)
685 return false;
686 f = oneFunc;
687 return true;
688}
689
690static bool useFuncSeen(const Constant *C,
692 for (const User *U : C->users()) {
693 if (const Constant *cu = dyn_cast<Constant>(U)) {
694 if (useFuncSeen(cu, seenMap))
695 return true;
696 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
697 const BasicBlock *bb = I->getParent();
698 if (!bb)
699 continue;
700 const Function *caller = bb->getParent();
701 if (!caller)
702 continue;
703 if (seenMap.contains(caller))
704 return true;
705 }
706 }
707 return false;
708}
709
710void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
712 for (const Function &F : M) {
713 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
714 emitDeclaration(&F, O);
715 continue;
716 }
717
718 if (F.isDeclaration()) {
719 if (F.use_empty())
720 continue;
721 if (F.getIntrinsicID())
722 continue;
723 emitDeclaration(&F, O);
724 continue;
725 }
726 for (const User *U : F.users()) {
727 if (const Constant *C = dyn_cast<Constant>(U)) {
728 if (usedInGlobalVarDef(C)) {
729 // The use is in the initialization of a global variable
730 // that is a function pointer, so print a declaration
731 // for the original function
732 emitDeclaration(&F, O);
733 break;
734 }
735 // Emit a declaration of this function if the function that
736 // uses this constant expr has already been seen.
737 if (useFuncSeen(C, seenMap)) {
738 emitDeclaration(&F, O);
739 break;
740 }
741 }
742
743 if (!isa<Instruction>(U))
744 continue;
745 const Instruction *instr = cast<Instruction>(U);
746 const BasicBlock *bb = instr->getParent();
747 if (!bb)
748 continue;
749 const Function *caller = bb->getParent();
750 if (!caller)
751 continue;
752
753 // If a caller has already been seen, then the caller is
754 // appearing in the module before the callee. so print out
755 // a declaration for the callee.
756 if (seenMap.contains(caller)) {
757 emitDeclaration(&F, O);
758 break;
759 }
760 }
761 seenMap[&F] = true;
762 }
763}
764
766 if (!GV) return true;
767 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
768 if (!InitList) return true; // Not an array; we don't know how to parse.
769 return InitList->getNumOperands() == 0;
770}
771
772void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
773 // Construct a default subtarget off of the TargetMachine defaults. The
774 // rest of NVPTX isn't friendly to change subtargets per function and
775 // so the default TargetMachine will have all of the options.
776 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
777 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
778 SmallString<128> Str1;
779 raw_svector_ostream OS1(Str1);
780
781 // Emit header before any dwarf directives are emitted below.
782 emitHeader(M, OS1, *STI);
783 OutStreamer->emitRawText(OS1.str());
784}
785
787 if (M.alias_size()) {
788 report_fatal_error("Module has aliases, which NVPTX does not support.");
789 return true; // error
790 }
791 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
793 "Module has a nontrivial global ctor, which NVPTX does not support.");
794 return true; // error
795 }
796 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
798 "Module has a nontrivial global dtor, which NVPTX does not support.");
799 return true; // error
800 }
801
802 // We need to call the parent's one explicitly.
803 bool Result = AsmPrinter::doInitialization(M);
804
805 GlobalsEmitted = false;
806
807 return Result;
808}
809
810void NVPTXAsmPrinter::emitGlobals(const Module &M) {
811 SmallString<128> Str2;
812 raw_svector_ostream OS2(Str2);
813
814 emitDeclarations(M, OS2);
815
816 // As ptxas does not support forward references of globals, we need to first
817 // sort the list of module-level globals in def-use order. We visit each
818 // global variable in order, and ensure that we emit it *after* its dependent
819 // globals. We use a little extra memory maintaining both a set and a list to
820 // have fast searches while maintaining a strict ordering.
824
825 // Visit each global variable, in order
826 for (const GlobalVariable &I : M.globals())
827 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
828
829 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
830 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
831
832 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
833 const NVPTXSubtarget &STI =
834 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
835
836 // Print out module-level global variables in proper order
837 for (unsigned i = 0, e = Globals.size(); i != e; ++i)
838 printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
839
840 OS2 << '\n';
841
842 OutStreamer->emitRawText(OS2.str());
843}
844
845void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
846 const NVPTXSubtarget &STI) {
847 O << "//\n";
848 O << "// Generated by LLVM NVPTX Back-End\n";
849 O << "//\n";
850 O << "\n";
851
852 unsigned PTXVersion = STI.getPTXVersion();
853 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
854
855 O << ".target ";
856 O << STI.getTargetName();
857
858 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
859 if (NTM.getDrvInterface() == NVPTX::NVCL)
860 O << ", texmode_independent";
861
862 bool HasFullDebugInfo = false;
863 for (DICompileUnit *CU : M.debug_compile_units()) {
864 switch(CU->getEmissionKind()) {
867 break;
870 HasFullDebugInfo = true;
871 break;
872 }
873 if (HasFullDebugInfo)
874 break;
875 }
876 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
877 O << ", debug";
878
879 O << "\n";
880
881 O << ".address_size ";
882 if (NTM.is64Bit())
883 O << "64";
884 else
885 O << "32";
886 O << "\n";
887
888 O << "\n";
889}
890
892 bool HasDebugInfo = MMI && MMI->hasDebugInfo();
893
894 // If we did not emit any functions, then the global declarations have not
895 // yet been emitted.
896 if (!GlobalsEmitted) {
897 emitGlobals(M);
898 GlobalsEmitted = true;
899 }
900
901 // call doFinalization
902 bool ret = AsmPrinter::doFinalization(M);
903
905
906 auto *TS =
907 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
908 // Close the last emitted section
909 if (HasDebugInfo) {
910 TS->closeLastSection();
911 // Emit empty .debug_loc section for better support of the empty files.
912 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
913 }
914
915 // Output last DWARF .file directives, if any.
916 TS->outputDwarfFileDirectives();
917
918 return ret;
919}
920
921// This function emits appropriate linkage directives for
922// functions and global variables.
923//
924// extern function declaration -> .extern
925// extern function definition -> .visible
926// external global variable with init -> .visible
927// external without init -> .extern
928// appending -> not allowed, assert.
929// for any linkage other than
930// internal, private, linker_private,
931// linker_private_weak, linker_private_weak_def_auto,
932// we emit -> .weak.
933
934void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
935 raw_ostream &O) {
936 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
937 if (V->hasExternalLinkage()) {
938 if (isa<GlobalVariable>(V)) {
939 const GlobalVariable *GVar = cast<GlobalVariable>(V);
940 if (GVar) {
941 if (GVar->hasInitializer())
942 O << ".visible ";
943 else
944 O << ".extern ";
945 }
946 } else if (V->isDeclaration())
947 O << ".extern ";
948 else
949 O << ".visible ";
950 } else if (V->hasAppendingLinkage()) {
951 std::string msg;
952 msg.append("Error: ");
953 msg.append("Symbol ");
954 if (V->hasName())
955 msg.append(std::string(V->getName()));
956 msg.append("has unsupported appending linkage type");
957 llvm_unreachable(msg.c_str());
958 } else if (!V->hasInternalLinkage() &&
959 !V->hasPrivateLinkage()) {
960 O << ".weak ";
961 }
962 }
963}
964
965void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
966 raw_ostream &O, bool processDemoted,
967 const NVPTXSubtarget &STI) {
968 // Skip meta data
969 if (GVar->hasSection()) {
970 if (GVar->getSection() == "llvm.metadata")
971 return;
972 }
973
974 // Skip LLVM intrinsic global variables
975 if (GVar->getName().startswith("llvm.") ||
976 GVar->getName().startswith("nvvm."))
977 return;
978
979 const DataLayout &DL = getDataLayout();
980
981 // GlobalVariables are always constant pointers themselves.
982 PointerType *PTy = GVar->getType();
983 Type *ETy = GVar->getValueType();
984
985 if (GVar->hasExternalLinkage()) {
986 if (GVar->hasInitializer())
987 O << ".visible ";
988 else
989 O << ".extern ";
990 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
992 GVar->hasCommonLinkage()) {
993 O << ".weak ";
994 }
995
996 if (isTexture(*GVar)) {
997 O << ".global .texref " << getTextureName(*GVar) << ";\n";
998 return;
999 }
1000
1001 if (isSurface(*GVar)) {
1002 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1003 return;
1004 }
1005
1006 if (GVar->isDeclaration()) {
1007 // (extern) declarations, no definition or initializer
1008 // Currently the only known declaration is for an automatic __local
1009 // (.shared) promoted to global.
1010 emitPTXGlobalVariable(GVar, O, STI);
1011 O << ";\n";
1012 return;
1013 }
1014
1015 if (isSampler(*GVar)) {
1016 O << ".global .samplerref " << getSamplerName(*GVar);
1017
1018 const Constant *Initializer = nullptr;
1019 if (GVar->hasInitializer())
1020 Initializer = GVar->getInitializer();
1021 const ConstantInt *CI = nullptr;
1022 if (Initializer)
1023 CI = dyn_cast<ConstantInt>(Initializer);
1024 if (CI) {
1025 unsigned sample = CI->getZExtValue();
1026
1027 O << " = { ";
1028
1029 for (int i = 0,
1030 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1031 i < 3; i++) {
1032 O << "addr_mode_" << i << " = ";
1033 switch (addr) {
1034 case 0:
1035 O << "wrap";
1036 break;
1037 case 1:
1038 O << "clamp_to_border";
1039 break;
1040 case 2:
1041 O << "clamp_to_edge";
1042 break;
1043 case 3:
1044 O << "wrap";
1045 break;
1046 case 4:
1047 O << "mirror";
1048 break;
1049 }
1050 O << ", ";
1051 }
1052 O << "filter_mode = ";
1053 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1054 case 0:
1055 O << "nearest";
1056 break;
1057 case 1:
1058 O << "linear";
1059 break;
1060 case 2:
1061 llvm_unreachable("Anisotropic filtering is not supported");
1062 default:
1063 O << "nearest";
1064 break;
1065 }
1066 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1067 O << ", force_unnormalized_coords = 1";
1068 }
1069 O << " }";
1070 }
1071
1072 O << ";\n";
1073 return;
1074 }
1075
1076 if (GVar->hasPrivateLinkage()) {
1077 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1078 return;
1079
1080 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1081 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1082 return;
1083 if (GVar->use_empty())
1084 return;
1085 }
1086
1087 const Function *demotedFunc = nullptr;
1088 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1089 O << "// " << GVar->getName() << " has been demoted\n";
1090 if (localDecls.find(demotedFunc) != localDecls.end())
1091 localDecls[demotedFunc].push_back(GVar);
1092 else {
1093 std::vector<const GlobalVariable *> temp;
1094 temp.push_back(GVar);
1095 localDecls[demotedFunc] = temp;
1096 }
1097 return;
1098 }
1099
1100 O << ".";
1101 emitPTXAddressSpace(PTy->getAddressSpace(), O);
1102
1103 if (isManaged(*GVar)) {
1104 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1106 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1107 }
1108 O << " .attribute(.managed)";
1109 }
1110
1111 if (MaybeAlign A = GVar->getAlign())
1112 O << " .align " << A->value();
1113 else
1114 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1115
1116 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1117 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1118 O << " .";
1119 // Special case: ABI requires that we use .u8 for predicates
1120 if (ETy->isIntegerTy(1))
1121 O << "u8";
1122 else
1123 O << getPTXFundamentalTypeStr(ETy, false);
1124 O << " ";
1125 getSymbol(GVar)->print(O, MAI);
1126
1127 // Ptx allows variable initilization only for constant and global state
1128 // spaces.
1129 if (GVar->hasInitializer()) {
1130 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1131 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1132 const Constant *Initializer = GVar->getInitializer();
1133 // 'undef' is treated as there is no value specified.
1134 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1135 O << " = ";
1136 printScalarConstant(Initializer, O);
1137 }
1138 } else {
1139 // The frontend adds zero-initializer to device and constant variables
1140 // that don't have an initial value, and UndefValue to shared
1141 // variables, so skip warning for this case.
1142 if (!GVar->getInitializer()->isNullValue() &&
1143 !isa<UndefValue>(GVar->getInitializer())) {
1144 report_fatal_error("initial value of '" + GVar->getName() +
1145 "' is not allowed in addrspace(" +
1146 Twine(PTy->getAddressSpace()) + ")");
1147 }
1148 }
1149 }
1150 } else {
1151 uint64_t ElementSize = 0;
1152
1153 // Although PTX has direct support for struct type and array type and
1154 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1155 // targets that support these high level field accesses. Structs, arrays
1156 // and vectors are lowered into arrays of bytes.
1157 switch (ETy->getTypeID()) {
1158 case Type::IntegerTyID: // Integers larger than 64 bits
1159 case Type::StructTyID:
1160 case Type::ArrayTyID:
1162 ElementSize = DL.getTypeStoreSize(ETy);
1163 // Ptx allows variable initilization only for constant and
1164 // global state spaces.
1165 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1166 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1167 GVar->hasInitializer()) {
1168 const Constant *Initializer = GVar->getInitializer();
1169 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1170 AggBuffer aggBuffer(ElementSize, *this);
1171 bufferAggregateConstant(Initializer, &aggBuffer);
1172 if (aggBuffer.numSymbols()) {
1173 unsigned int ptrSize = MAI->getCodePointerSize();
1174 if (ElementSize % ptrSize ||
1175 !aggBuffer.allSymbolsAligned(ptrSize)) {
1176 // Print in bytes and use the mask() operator for pointers.
1177 if (!STI.hasMaskOperator())
1179 "initialized packed aggregate with pointers '" +
1180 GVar->getName() +
1181 "' requires at least PTX ISA version 7.1");
1182 O << " .u8 ";
1183 getSymbol(GVar)->print(O, MAI);
1184 O << "[" << ElementSize << "] = {";
1185 aggBuffer.printBytes(O);
1186 O << "}";
1187 } else {
1188 O << " .u" << ptrSize * 8 << " ";
1189 getSymbol(GVar)->print(O, MAI);
1190 O << "[" << ElementSize / ptrSize << "] = {";
1191 aggBuffer.printWords(O);
1192 O << "}";
1193 }
1194 } else {
1195 O << " .b8 ";
1196 getSymbol(GVar)->print(O, MAI);
1197 O << "[" << ElementSize << "] = {";
1198 aggBuffer.printBytes(O);
1199 O << "}";
1200 }
1201 } else {
1202 O << " .b8 ";
1203 getSymbol(GVar)->print(O, MAI);
1204 if (ElementSize) {
1205 O << "[";
1206 O << ElementSize;
1207 O << "]";
1208 }
1209 }
1210 } else {
1211 O << " .b8 ";
1212 getSymbol(GVar)->print(O, MAI);
1213 if (ElementSize) {
1214 O << "[";
1215 O << ElementSize;
1216 O << "]";
1217 }
1218 }
1219 break;
1220 default:
1221 llvm_unreachable("type not supported yet");
1222 }
1223 }
1224 O << ";\n";
1225}
1226
1227void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1228 const Value *v = Symbols[nSym];
1229 const Value *v0 = SymbolsBeforeStripping[nSym];
1230 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1231 MCSymbol *Name = AP.getSymbol(GVar);
1232 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1233 // Is v0 a generic pointer?
1234 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1235 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1236 os << "generic(";
1237 Name->print(os, AP.MAI);
1238 os << ")";
1239 } else {
1240 Name->print(os, AP.MAI);
1241 }
1242 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1243 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1244 AP.printMCExpr(*Expr, os);
1245 } else
1246 llvm_unreachable("symbol type unknown");
1247}
1248
1249void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1250 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1251 symbolPosInBuffer.push_back(size);
1252 unsigned int nSym = 0;
1253 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1254 for (unsigned int pos = 0; pos < size;) {
1255 if (pos)
1256 os << ", ";
1257 if (pos != nextSymbolPos) {
1258 os << (unsigned int)buffer[pos];
1259 ++pos;
1260 continue;
1261 }
1262 // Generate a per-byte mask() operator for the symbol, which looks like:
1263 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1264 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1265 std::string symText;
1266 llvm::raw_string_ostream oss(symText);
1267 printSymbol(nSym, oss);
1268 for (unsigned i = 0; i < ptrSize; ++i) {
1269 if (i)
1270 os << ", ";
1271 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1272 os << "(" << symText << ")";
1273 }
1274 pos += ptrSize;
1275 nextSymbolPos = symbolPosInBuffer[++nSym];
1276 assert(nextSymbolPos >= pos);
1277 }
1278}
1279
1280void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1281 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1282 symbolPosInBuffer.push_back(size);
1283 unsigned int nSym = 0;
1284 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1285 assert(nextSymbolPos % ptrSize == 0);
1286 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1287 if (pos)
1288 os << ", ";
1289 if (pos == nextSymbolPos) {
1290 printSymbol(nSym, os);
1291 nextSymbolPos = symbolPosInBuffer[++nSym];
1292 assert(nextSymbolPos % ptrSize == 0);
1293 assert(nextSymbolPos >= pos + ptrSize);
1294 } else if (ptrSize == 4)
1295 os << support::endian::read32le(&buffer[pos]);
1296 else
1297 os << support::endian::read64le(&buffer[pos]);
1298 }
1299}
1300
1301void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1302 if (localDecls.find(f) == localDecls.end())
1303 return;
1304
1305 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1306
1307 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1308 const NVPTXSubtarget &STI =
1309 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1310
1311 for (const GlobalVariable *GV : gvars) {
1312 O << "\t// demoted variable\n\t";
1313 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1314 }
1315}
1316
1317void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1318 raw_ostream &O) const {
1319 switch (AddressSpace) {
1321 O << "local";
1322 break;
1324 O << "global";
1325 break;
1327 O << "const";
1328 break;
1330 O << "shared";
1331 break;
1332 default:
1333 report_fatal_error("Bad address space found while emitting PTX: " +
1335 break;
1336 }
1337}
1338
1339std::string
1340NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1341 switch (Ty->getTypeID()) {
1342 case Type::IntegerTyID: {
1343 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1344 if (NumBits == 1)
1345 return "pred";
1346 else if (NumBits <= 64) {
1347 std::string name = "u";
1348 return name + utostr(NumBits);
1349 } else {
1350 llvm_unreachable("Integer too large");
1351 break;
1352 }
1353 break;
1354 }
1355 case Type::HalfTyID:
1356 // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1357 return "b16";
1358 case Type::FloatTyID:
1359 return "f32";
1360 case Type::DoubleTyID:
1361 return "f64";
1362 case Type::PointerTyID: {
1363 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1364 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1365
1366 if (PtrSize == 64)
1367 if (useB4PTR)
1368 return "b64";
1369 else
1370 return "u64";
1371 else if (useB4PTR)
1372 return "b32";
1373 else
1374 return "u32";
1375 }
1376 default:
1377 break;
1378 }
1379 llvm_unreachable("unexpected type");
1380}
1381
1382void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1383 raw_ostream &O,
1384 const NVPTXSubtarget &STI) {
1385 const DataLayout &DL = getDataLayout();
1386
1387 // GlobalVariables are always constant pointers themselves.
1388 Type *ETy = GVar->getValueType();
1389
1390 O << ".";
1391 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1392 if (isManaged(*GVar)) {
1393 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1395 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1396 }
1397 O << " .attribute(.managed)";
1398 }
1399 if (MaybeAlign A = GVar->getAlign())
1400 O << " .align " << A->value();
1401 else
1402 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1403
1404 // Special case for i128
1405 if (ETy->isIntegerTy(128)) {
1406 O << " .b8 ";
1407 getSymbol(GVar)->print(O, MAI);
1408 O << "[16]";
1409 return;
1410 }
1411
1412 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1413 O << " .";
1414 O << getPTXFundamentalTypeStr(ETy);
1415 O << " ";
1416 getSymbol(GVar)->print(O, MAI);
1417 return;
1418 }
1419
1420 int64_t ElementSize = 0;
1421
1422 // Although PTX has direct support for struct type and array type and LLVM IR
1423 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1424 // support these high level field accesses. Structs and arrays are lowered
1425 // into arrays of bytes.
1426 switch (ETy->getTypeID()) {
1427 case Type::StructTyID:
1428 case Type::ArrayTyID:
1430 ElementSize = DL.getTypeStoreSize(ETy);
1431 O << " .b8 ";
1432 getSymbol(GVar)->print(O, MAI);
1433 O << "[";
1434 if (ElementSize) {
1435 O << ElementSize;
1436 }
1437 O << "]";
1438 break;
1439 default:
1440 llvm_unreachable("type not supported yet");
1441 }
1442}
1443
1444void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1445 const DataLayout &DL = getDataLayout();
1446 const AttributeList &PAL = F->getAttributes();
1447 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1448 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1449
1451 unsigned paramIndex = 0;
1452 bool first = true;
1453 bool isKernelFunc = isKernelFunction(*F);
1454 bool isABI = (STI.getSmVersion() >= 20);
1455 bool hasImageHandles = STI.hasImageHandles();
1456
1457 if (F->arg_empty() && !F->isVarArg()) {
1458 O << "()\n";
1459 return;
1460 }
1461
1462 O << "(\n";
1463
1464 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1465 Type *Ty = I->getType();
1466
1467 if (!first)
1468 O << ",\n";
1469
1470 first = false;
1471
1472 // Handle image/sampler parameters
1473 if (isKernelFunction(*F)) {
1474 if (isSampler(*I) || isImage(*I)) {
1475 if (isImage(*I)) {
1476 std::string sname = std::string(I->getName());
1477 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1478 if (hasImageHandles)
1479 O << "\t.param .u64 .ptr .surfref ";
1480 else
1481 O << "\t.param .surfref ";
1482 O << TLI->getParamName(F, paramIndex);
1483 }
1484 else { // Default image is read_only
1485 if (hasImageHandles)
1486 O << "\t.param .u64 .ptr .texref ";
1487 else
1488 O << "\t.param .texref ";
1489 O << TLI->getParamName(F, paramIndex);
1490 }
1491 } else {
1492 if (hasImageHandles)
1493 O << "\t.param .u64 .ptr .samplerref ";
1494 else
1495 O << "\t.param .samplerref ";
1496 O << TLI->getParamName(F, paramIndex);
1497 }
1498 continue;
1499 }
1500 }
1501
1502 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1503 paramIndex](Type *Ty) -> Align {
1504 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1505 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1506 return std::max(TypeAlign, ParamAlign.valueOrOne());
1507 };
1508
1509 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1510 if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1511 // Just print .param .align <a> .b8 .param[size];
1512 // <a> = optimal alignment for the element type; always multiple of
1513 // PAL.getParamAlignment
1514 // size = typeallocsize of element type
1515 Align OptimalAlign = getOptimalAlignForParam(Ty);
1516
1517 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1518 O << TLI->getParamName(F, paramIndex);
1519 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1520
1521 continue;
1522 }
1523 // Just a scalar
1524 auto *PTy = dyn_cast<PointerType>(Ty);
1525 unsigned PTySizeInBits = 0;
1526 if (PTy) {
1527 PTySizeInBits =
1528 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1529 assert(PTySizeInBits && "Invalid pointer size");
1530 }
1531
1532 if (isKernelFunc) {
1533 if (PTy) {
1534 // Special handling for pointer arguments to kernel
1535 O << "\t.param .u" << PTySizeInBits << " ";
1536
1537 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1538 NVPTX::CUDA) {
1539 int addrSpace = PTy->getAddressSpace();
1540 switch (addrSpace) {
1541 default:
1542 O << ".ptr ";
1543 break;
1545 O << ".ptr .const ";
1546 break;
1548 O << ".ptr .shared ";
1549 break;
1551 O << ".ptr .global ";
1552 break;
1553 }
1554 Align ParamAlign = I->getParamAlign().valueOrOne();
1555 O << ".align " << ParamAlign.value() << " ";
1556 }
1557 O << TLI->getParamName(F, paramIndex);
1558 continue;
1559 }
1560
1561 // non-pointer scalar to kernel func
1562 O << "\t.param .";
1563 // Special case: predicate operands become .u8 types
1564 if (Ty->isIntegerTy(1))
1565 O << "u8";
1566 else
1567 O << getPTXFundamentalTypeStr(Ty);
1568 O << " ";
1569 O << TLI->getParamName(F, paramIndex);
1570 continue;
1571 }
1572 // Non-kernel function, just print .param .b<size> for ABI
1573 // and .reg .b<size> for non-ABI
1574 unsigned sz = 0;
1575 if (isa<IntegerType>(Ty)) {
1576 sz = cast<IntegerType>(Ty)->getBitWidth();
1578 } else if (PTy) {
1579 assert(PTySizeInBits && "Invalid pointer size");
1580 sz = PTySizeInBits;
1581 } else if (Ty->isHalfTy())
1582 // PTX ABI requires all scalar parameters to be at least 32
1583 // bits in size. fp16 normally uses .b16 as its storage type
1584 // in PTX, so its size must be adjusted here, too.
1585 sz = 32;
1586 else
1587 sz = Ty->getPrimitiveSizeInBits();
1588 if (isABI)
1589 O << "\t.param .b" << sz << " ";
1590 else
1591 O << "\t.reg .b" << sz << " ";
1592 O << TLI->getParamName(F, paramIndex);
1593 continue;
1594 }
1595
1596 // param has byVal attribute.
1597 Type *ETy = PAL.getParamByValType(paramIndex);
1598 assert(ETy && "Param should have byval type");
1599
1600 if (isABI || isKernelFunc) {
1601 // Just print .param .align <a> .b8 .param[size];
1602 // <a> = optimal alignment for the element type; always multiple of
1603 // PAL.getParamAlignment
1604 // size = typeallocsize of element type
1605 Align OptimalAlign =
1606 isKernelFunc
1607 ? getOptimalAlignForParam(ETy)
1608 : TLI->getFunctionByValParamAlign(
1609 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1610
1611 unsigned sz = DL.getTypeAllocSize(ETy);
1612 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1613 O << TLI->getParamName(F, paramIndex);
1614 O << "[" << sz << "]";
1615 continue;
1616 } else {
1617 // Split the ETy into constituent parts and
1618 // print .param .b<size> <name> for each part.
1619 // Further, if a part is vector, print the above for
1620 // each vector element.
1621 SmallVector<EVT, 16> vtparts;
1622 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1623 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1624 unsigned elems = 1;
1625 EVT elemtype = vtparts[i];
1626 if (vtparts[i].isVector()) {
1627 elems = vtparts[i].getVectorNumElements();
1628 elemtype = vtparts[i].getVectorElementType();
1629 }
1630
1631 for (unsigned j = 0, je = elems; j != je; ++j) {
1632 unsigned sz = elemtype.getSizeInBits();
1633 if (elemtype.isInteger())
1635 O << "\t.reg .b" << sz << " ";
1636 O << TLI->getParamName(F, paramIndex);
1637 if (j < je - 1)
1638 O << ",\n";
1639 ++paramIndex;
1640 }
1641 if (i < e - 1)
1642 O << ",\n";
1643 }
1644 --paramIndex;
1645 continue;
1646 }
1647 }
1648
1649 if (F->isVarArg()) {
1650 if (!first)
1651 O << ",\n";
1652 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1653 O << " .b8 ";
1654 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1655 }
1656
1657 O << "\n)\n";
1658}
1659
1660void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1661 const MachineFunction &MF) {
1662 SmallString<128> Str;
1664
1665 // Map the global virtual register number to a register class specific
1666 // virtual register number starting from 1 with that class.
1668 //unsigned numRegClasses = TRI->getNumRegClasses();
1669
1670 // Emit the Fake Stack Object
1671 const MachineFrameInfo &MFI = MF.getFrameInfo();
1672 int NumBytes = (int) MFI.getStackSize();
1673 if (NumBytes) {
1674 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1675 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1676 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1677 O << "\t.reg .b64 \t%SP;\n";
1678 O << "\t.reg .b64 \t%SPL;\n";
1679 } else {
1680 O << "\t.reg .b32 \t%SP;\n";
1681 O << "\t.reg .b32 \t%SPL;\n";
1682 }
1683 }
1684
1685 // Go through all virtual registers to establish the mapping between the
1686 // global virtual
1687 // register number and the per class virtual register number.
1688 // We use the per class virtual register number in the ptx output.
1689 unsigned int numVRs = MRI->getNumVirtRegs();
1690 for (unsigned i = 0; i < numVRs; i++) {
1692 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1693 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1694 int n = regmap.size();
1695 regmap.insert(std::make_pair(vr, n + 1));
1696 }
1697
1698 // Emit register declarations
1699 // @TODO: Extract out the real register usage
1700 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1701 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1702 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1703 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1704 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1705 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1706 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1707
1708 // Emit declaration of the virtual registers or 'physical' registers for
1709 // each register class
1710 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1711 const TargetRegisterClass *RC = TRI->getRegClass(i);
1712 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1713 std::string rcname = getNVPTXRegClassName(RC);
1714 std::string rcStr = getNVPTXRegClassStr(RC);
1715 int n = regmap.size();
1716
1717 // Only declare those registers that may be used.
1718 if (n) {
1719 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1720 << ">;\n";
1721 }
1722 }
1723
1724 OutStreamer->emitRawText(O.str());
1725}
1726
1727void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1728 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1729 bool ignored;
1730 unsigned int numHex;
1731 const char *lead;
1732
1733 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1734 numHex = 8;
1735 lead = "0f";
1737 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1738 numHex = 16;
1739 lead = "0d";
1741 } else
1742 llvm_unreachable("unsupported fp type");
1743
1744 APInt API = APF.bitcastToAPInt();
1745 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1746}
1747
1748void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1749 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1750 O << CI->getValue();
1751 return;
1752 }
1753 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1754 printFPConstant(CFP, O);
1755 return;
1756 }
1757 if (isa<ConstantPointerNull>(CPV)) {
1758 O << "0";
1759 return;
1760 }
1761 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1762 bool IsNonGenericPointer = false;
1763 if (GVar->getType()->getAddressSpace() != 0) {
1764 IsNonGenericPointer = true;
1765 }
1766 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1767 O << "generic(";
1768 getSymbol(GVar)->print(O, MAI);
1769 O << ")";
1770 } else {
1771 getSymbol(GVar)->print(O, MAI);
1772 }
1773 return;
1774 }
1775 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1776 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1777 printMCExpr(*E, O);
1778 return;
1779 }
1780 llvm_unreachable("Not scalar type found in printScalarConstant()");
1781}
1782
1783void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1784 AggBuffer *AggBuffer) {
1785 const DataLayout &DL = getDataLayout();
1786 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1787 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1788 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1789 // only the space allocated by CPV.
1790 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1791 return;
1792 }
1793
1794 // Helper for filling AggBuffer with APInts.
1795 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1796 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1797 SmallVector<unsigned char, 16> Buf(NumBytes);
1798 for (unsigned I = 0; I < NumBytes; ++I) {
1799 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1800 }
1801 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1802 };
1803
1804 switch (CPV->getType()->getTypeID()) {
1805 case Type::IntegerTyID:
1806 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1807 AddIntToBuffer(CI->getValue());
1808 break;
1809 }
1810 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1811 if (const auto *CI =
1812 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1813 AddIntToBuffer(CI->getValue());
1814 break;
1815 }
1816 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1817 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1818 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1819 AggBuffer->addZeros(AllocSize);
1820 break;
1821 }
1822 }
1823 llvm_unreachable("unsupported integer const type");
1824 break;
1825
1826 case Type::HalfTyID:
1827 case Type::BFloatTyID:
1828 case Type::FloatTyID:
1829 case Type::DoubleTyID:
1830 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1831 break;
1832
1833 case Type::PointerTyID: {
1834 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1835 AggBuffer->addSymbol(GVar, GVar);
1836 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1837 const Value *v = Cexpr->stripPointerCasts();
1838 AggBuffer->addSymbol(v, Cexpr);
1839 }
1840 AggBuffer->addZeros(AllocSize);
1841 break;
1842 }
1843
1844 case Type::ArrayTyID:
1846 case Type::StructTyID: {
1847 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1848 bufferAggregateConstant(CPV, AggBuffer);
1849 if (Bytes > AllocSize)
1850 AggBuffer->addZeros(Bytes - AllocSize);
1851 } else if (isa<ConstantAggregateZero>(CPV))
1852 AggBuffer->addZeros(Bytes);
1853 else
1854 llvm_unreachable("Unexpected Constant type");
1855 break;
1856 }
1857
1858 default:
1859 llvm_unreachable("unsupported type");
1860 }
1861}
1862
1863void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1864 AggBuffer *aggBuffer) {
1865 const DataLayout &DL = getDataLayout();
1866 int Bytes;
1867
1868 // Integers of arbitrary width
1869 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1870 APInt Val = CI->getValue();
1871 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1872 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1873 aggBuffer->addBytes(&Byte, 1, 1);
1874 Val.lshrInPlace(8);
1875 }
1876 return;
1877 }
1878
1879 // Old constants
1880 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1881 if (CPV->getNumOperands())
1882 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1883 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1884 return;
1885 }
1886
1887 if (const ConstantDataSequential *CDS =
1888 dyn_cast<ConstantDataSequential>(CPV)) {
1889 if (CDS->getNumElements())
1890 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1891 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1892 aggBuffer);
1893 return;
1894 }
1895
1896 if (isa<ConstantStruct>(CPV)) {
1897 if (CPV->getNumOperands()) {
1898 StructType *ST = cast<StructType>(CPV->getType());
1899 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1900 if (i == (e - 1))
1901 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1902 DL.getTypeAllocSize(ST) -
1903 DL.getStructLayout(ST)->getElementOffset(i);
1904 else
1905 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1906 DL.getStructLayout(ST)->getElementOffset(i);
1907 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1908 }
1909 }
1910 return;
1911 }
1912 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1913}
1914
1915/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1916/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1917/// expressions that are representable in PTX and create
1918/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1919const MCExpr *
1920NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1921 MCContext &Ctx = OutContext;
1922
1923 if (CV->isNullValue() || isa<UndefValue>(CV))
1924 return MCConstantExpr::create(0, Ctx);
1925
1926 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1927 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1928
1929 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1930 const MCSymbolRefExpr *Expr =
1932 if (ProcessingGeneric) {
1933 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1934 } else {
1935 return Expr;
1936 }
1937 }
1938
1939 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1940 if (!CE) {
1941 llvm_unreachable("Unknown constant value to lower!");
1942 }
1943
1944 switch (CE->getOpcode()) {
1945 default: {
1946 // If the code isn't optimized, there may be outstanding folding
1947 // opportunities. Attempt to fold the expression using DataLayout as a
1948 // last resort before giving up.
1950 if (C != CE)
1951 return lowerConstantForGV(C, ProcessingGeneric);
1952
1953 // Otherwise report the problem to the user.
1954 std::string S;
1956 OS << "Unsupported expression in static initializer: ";
1957 CE->printAsOperand(OS, /*PrintType=*/false,
1958 !MF ? nullptr : MF->getFunction().getParent());
1959 report_fatal_error(Twine(OS.str()));
1960 }
1961
1962 case Instruction::AddrSpaceCast: {
1963 // Strip the addrspacecast and pass along the operand
1964 PointerType *DstTy = cast<PointerType>(CE->getType());
1965 if (DstTy->getAddressSpace() == 0) {
1966 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1967 }
1968 std::string S;
1970 OS << "Unsupported expression in static initializer: ";
1971 CE->printAsOperand(OS, /*PrintType=*/ false,
1972 !MF ? nullptr : MF->getFunction().getParent());
1973 report_fatal_error(Twine(OS.str()));
1974 }
1975
1976 case Instruction::GetElementPtr: {
1977 const DataLayout &DL = getDataLayout();
1978
1979 // Generate a symbolic expression for the byte address
1980 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1981 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1982
1983 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1984 ProcessingGeneric);
1985 if (!OffsetAI)
1986 return Base;
1987
1988 int64_t Offset = OffsetAI.getSExtValue();
1990 Ctx);
1991 }
1992
1993 case Instruction::Trunc:
1994 // We emit the value and depend on the assembler to truncate the generated
1995 // expression properly. This is important for differences between
1996 // blockaddress labels. Since the two labels are in the same function, it
1997 // is reasonable to treat their delta as a 32-bit value.
1998 [[fallthrough]];
1999 case Instruction::BitCast:
2000 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2001
2002 case Instruction::IntToPtr: {
2003 const DataLayout &DL = getDataLayout();
2004
2005 // Handle casts to pointers by changing them into casts to the appropriate
2006 // integer type. This promotes constant folding and simplifies this code.
2007 Constant *Op = CE->getOperand(0);
2008 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2009 false/*ZExt*/);
2010 return lowerConstantForGV(Op, ProcessingGeneric);
2011 }
2012
2013 case Instruction::PtrToInt: {
2014 const DataLayout &DL = getDataLayout();
2015
2016 // Support only foldable casts to/from pointers that can be eliminated by
2017 // changing the pointer to the appropriately sized integer type.
2018 Constant *Op = CE->getOperand(0);
2019 Type *Ty = CE->getType();
2020
2021 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2022
2023 // We can emit the pointer value into this slot if the slot is an
2024 // integer slot equal to the size of the pointer.
2025 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2026 return OpExpr;
2027
2028 // Otherwise the pointer is smaller than the resultant integer, mask off
2029 // the high bits so we are sure to get a proper truncation if the input is
2030 // a constant expr.
2031 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2032 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2033 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2034 }
2035
2036 // The MC library also has a right-shift operator, but it isn't consistently
2037 // signed or unsigned between different targets.
2038 case Instruction::Add: {
2039 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2040 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2041 switch (CE->getOpcode()) {
2042 default: llvm_unreachable("Unknown binary operator constant cast expr");
2043 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2044 }
2045 }
2046 }
2047}
2048
2049// Copy of MCExpr::print customized for NVPTX
2050void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2051 switch (Expr.getKind()) {
2052 case MCExpr::Target:
2053 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2054 case MCExpr::Constant:
2055 OS << cast<MCConstantExpr>(Expr).getValue();
2056 return;
2057
2058 case MCExpr::SymbolRef: {
2059 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2060 const MCSymbol &Sym = SRE.getSymbol();
2061 Sym.print(OS, MAI);
2062 return;
2063 }
2064
2065 case MCExpr::Unary: {
2066 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2067 switch (UE.getOpcode()) {
2068 case MCUnaryExpr::LNot: OS << '!'; break;
2069 case MCUnaryExpr::Minus: OS << '-'; break;
2070 case MCUnaryExpr::Not: OS << '~'; break;
2071 case MCUnaryExpr::Plus: OS << '+'; break;
2072 }
2073 printMCExpr(*UE.getSubExpr(), OS);
2074 return;
2075 }
2076
2077 case MCExpr::Binary: {
2078 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2079
2080 // Only print parens around the LHS if it is non-trivial.
2081 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2082 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2083 printMCExpr(*BE.getLHS(), OS);
2084 } else {
2085 OS << '(';
2086 printMCExpr(*BE.getLHS(), OS);
2087 OS<< ')';
2088 }
2089
2090 switch (BE.getOpcode()) {
2091 case MCBinaryExpr::Add:
2092 // Print "X-42" instead of "X+-42".
2093 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2094 if (RHSC->getValue() < 0) {
2095 OS << RHSC->getValue();
2096 return;
2097 }
2098 }
2099
2100 OS << '+';
2101 break;
2102 default: llvm_unreachable("Unhandled binary operator");
2103 }
2104
2105 // Only print parens around the LHS if it is non-trivial.
2106 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2107 printMCExpr(*BE.getRHS(), OS);
2108 } else {
2109 OS << '(';
2110 printMCExpr(*BE.getRHS(), OS);
2111 OS << ')';
2112 }
2113 return;
2114 }
2115 }
2116
2117 llvm_unreachable("Invalid expression kind!");
2118}
2119
2120/// PrintAsmOperand - Print out an operand for an inline asm expression.
2121///
2122bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2123 const char *ExtraCode, raw_ostream &O) {
2124 if (ExtraCode && ExtraCode[0]) {
2125 if (ExtraCode[1] != 0)
2126 return true; // Unknown modifier.
2127
2128 switch (ExtraCode[0]) {
2129 default:
2130 // See if this is a generic print operand
2131 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2132 case 'r':
2133 break;
2134 }
2135 }
2136
2137 printOperand(MI, OpNo, O);
2138
2139 return false;
2140}
2141
2142bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2143 unsigned OpNo,
2144 const char *ExtraCode,
2145 raw_ostream &O) {
2146 if (ExtraCode && ExtraCode[0])
2147 return true; // Unknown modifier
2148
2149 O << '[';
2150 printMemOperand(MI, OpNo, O);
2151 O << ']';
2152
2153 return false;
2154}
2155
2156void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2157 raw_ostream &O) {
2158 const MachineOperand &MO = MI->getOperand(opNum);
2159 switch (MO.getType()) {
2161 if (MO.getReg().isPhysical()) {
2162 if (MO.getReg() == NVPTX::VRDepot)
2164 else
2166 } else {
2167 emitVirtualRegister(MO.getReg(), O);
2168 }
2169 break;
2170
2172 O << MO.getImm();
2173 break;
2174
2176 printFPConstant(MO.getFPImm(), O);
2177 break;
2178
2180 PrintSymbolOperand(MO, O);
2181 break;
2182
2184 MO.getMBB()->getSymbol()->print(O, MAI);
2185 break;
2186
2187 default:
2188 llvm_unreachable("Operand type not supported.");
2189 }
2190}
2191
2192void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2193 raw_ostream &O, const char *Modifier) {
2194 printOperand(MI, opNum, O);
2195
2196 if (Modifier && strcmp(Modifier, "add") == 0) {
2197 O << ", ";
2198 printOperand(MI, opNum + 1, O);
2199 } else {
2200 if (MI->getOperand(opNum + 1).isImm() &&
2201 MI->getOperand(opNum + 1).getImm() == 0)
2202 return; // don't print ',0' or '+0'
2203 O << "+";
2204 printOperand(MI, opNum + 1, O);
2205 }
2206}
2207
2208// Force static initialization.
2212}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:49
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
@ Globals
Definition: TextStubV5.cpp:115
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5450
APInt bitcastToAPInt() const
Definition: APFloat.h:1184
Class for arbitrary precision integers.
Definition: APInt.h:75
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:605
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:846
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:662
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:398
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:428
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:375
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:399
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:383
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:402
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:393
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
ConstantArray - Constant Array Declarations.
Definition: Constants.h:413
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:573
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1002
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2058
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:301
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:109
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:510
bool hasExternalLinkage() const
Definition: GlobalValue.h:506
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:275
bool hasPrivateLinkage() const
Definition: GlobalValue.h:522
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
bool hasInternalLinkage() const
Definition: GlobalValue.h:521
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
bool hasWeakLinkage() const
Definition: GlobalValue.h:517
bool hasCommonLinkage() const
Definition: GlobalValue.h:527
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:507
Type * getValueType() const
Definition: GlobalValue.h:292
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:550
Binary assembler expressions.
Definition: MCExpr.h:481
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:628
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:631
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:530
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:625
@ Add
Addition.
Definition: MCExpr.h:484
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:76
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ Unary
Unary expressions.
Definition: MCExpr.h:41
@ Constant
Constant expressions.
Definition: MCExpr.h:39
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
@ Target
Target specific expression.
Definition: MCExpr.h:42
@ Binary
Binary expressions.
Definition: MCExpr.h:38
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:399
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:425
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:468
@ Minus
Unary minus.
Definition: MCExpr.h:429
@ Plus
Unary plus.
Definition: MCExpr.h:431
@ Not
Bitwise negation.
Definition: MCExpr.h:430
@ LNot
Logical negation.
Definition: MCExpr.h:428
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:471
Metadata node.
Definition: Metadata.h:943
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool hasDebugInfo() const
Returns true if valid debug info is present.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:213
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:74
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:73
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:297
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:246
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:76
@ CUDA
Definition: NVPTX.h:77
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
uint64_t read64le(const void *P)
Definition: Endian.h:382
uint32_t read32le(const void *P)
Definition: Endian.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
bool getMaxNTIDz(const Function &F, unsigned &z)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
std::string getSurfaceName(const Value &val)
bool getReqNTIDx(const Function &F, unsigned &x)
bool getReqNTIDy(const Function &F, unsigned &y)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool getReqNTIDz(const Function &F, unsigned &z)
bool getMaxNTIDx(const Function &F, unsigned &x)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
bool getMaxNTIDy(const Function &F, unsigned &y)
bool isSampler(const Value &val)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:901
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:256
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:225
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:259
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...