LLVM 19.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
134unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
135 return VersionMajor >= 12 ? 8 : 0;
136}
137
138/// \returns VmVsrc bit width
139inline unsigned getVmVsrcBitWidth() { return 3; }
140
141/// \returns VmVsrc bit shift
142inline unsigned getVmVsrcBitShift() { return 2; }
143
144/// \returns VaVdst bit width
145inline unsigned getVaVdstBitWidth() { return 4; }
146
147/// \returns VaVdst bit shift
148inline unsigned getVaVdstBitShift() { return 12; }
149
150/// \returns SaSdst bit width
151inline unsigned getSaSdstBitWidth() { return 1; }
152
153/// \returns SaSdst bit shift
154inline unsigned getSaSdstBitShift() { return 0; }
155
156} // end namespace anonymous
157
158namespace llvm {
159
160namespace AMDGPU {
161
162/// \returns True if \p STI is AMDHSA.
163bool isHsaAbi(const MCSubtargetInfo &STI) {
164 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
165}
166
168 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
169 M.getModuleFlag("amdhsa_code_object_version"))) {
170 return (unsigned)Ver->getZExtValue() / 100;
171 }
172
174}
175
178}
179
180unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
181 switch (ABIVersion) {
183 return 4;
185 return 5;
187 return 6;
188 default:
190 }
191}
192
193uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
194 if (T.getOS() != Triple::AMDHSA)
195 return 0;
196
197 switch (CodeObjectVersion) {
198 case 4:
200 case 5:
202 case 6:
204 default:
205 report_fatal_error("Unsupported AMDHSA Code Object Version " +
206 Twine(CodeObjectVersion));
207 }
208}
209
210unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
211 switch (CodeObjectVersion) {
212 case AMDHSA_COV4:
213 return 48;
214 case AMDHSA_COV5:
215 case AMDHSA_COV6:
216 default:
218 }
219}
220
221
222// FIXME: All such magic numbers about the ABI should be in a
223// central TD file.
224unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
225 switch (CodeObjectVersion) {
226 case AMDHSA_COV4:
227 return 24;
228 case AMDHSA_COV5:
229 case AMDHSA_COV6:
230 default:
232 }
233}
234
235unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
236 switch (CodeObjectVersion) {
237 case AMDHSA_COV4:
238 return 32;
239 case AMDHSA_COV5:
240 case AMDHSA_COV6:
241 default:
243 }
244}
245
246unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
247 switch (CodeObjectVersion) {
248 case AMDHSA_COV4:
249 return 40;
250 case AMDHSA_COV5:
251 case AMDHSA_COV6:
252 default:
254 }
255}
256
257#define GET_MIMGBaseOpcodesTable_IMPL
258#define GET_MIMGDimInfoTable_IMPL
259#define GET_MIMGInfoTable_IMPL
260#define GET_MIMGLZMappingTable_IMPL
261#define GET_MIMGMIPMappingTable_IMPL
262#define GET_MIMGBiasMappingTable_IMPL
263#define GET_MIMGOffsetMappingTable_IMPL
264#define GET_MIMGG16MappingTable_IMPL
265#define GET_MAIInstInfoTable_IMPL
266#include "AMDGPUGenSearchableTables.inc"
267
268int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
269 unsigned VDataDwords, unsigned VAddrDwords) {
270 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
271 VDataDwords, VAddrDwords);
272 return Info ? Info->Opcode : -1;
273}
274
276 const MIMGInfo *Info = getMIMGInfo(Opc);
277 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
278}
279
280int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
281 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
282 const MIMGInfo *NewInfo =
283 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
284 NewChannels, OrigInfo->VAddrDwords);
285 return NewInfo ? NewInfo->Opcode : -1;
286}
287
288unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
289 const MIMGDimInfo *Dim, bool IsA16,
290 bool IsG16Supported) {
291 unsigned AddrWords = BaseOpcode->NumExtraArgs;
292 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
293 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
294 if (IsA16)
295 AddrWords += divideCeil(AddrComponents, 2);
296 else
297 AddrWords += AddrComponents;
298
299 // Note: For subtargets that support A16 but not G16, enabling A16 also
300 // enables 16 bit gradients.
301 // For subtargets that support A16 (operand) and G16 (done with a different
302 // instruction encoding), they are independent.
303
304 if (BaseOpcode->Gradients) {
305 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
306 // There are two gradients per coordinate, we pack them separately.
307 // For the 3d case,
308 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
309 AddrWords += alignTo<2>(Dim->NumGradients / 2);
310 else
311 AddrWords += Dim->NumGradients;
312 }
313 return AddrWords;
314}
315
316struct MUBUFInfo {
319 uint8_t elements;
324 bool tfe;
325};
326
327struct MTBUFInfo {
330 uint8_t elements;
334};
335
336struct SMInfo {
339};
340
341struct VOPInfo {
344};
345
348};
349
352};
353
356};
357
362};
363
364struct VOPDInfo {
369};
370
374};
375
380};
381
382#define GET_MTBUFInfoTable_DECL
383#define GET_MTBUFInfoTable_IMPL
384#define GET_MUBUFInfoTable_DECL
385#define GET_MUBUFInfoTable_IMPL
386#define GET_SingleUseExceptionTable_DECL
387#define GET_SingleUseExceptionTable_IMPL
388#define GET_SMInfoTable_DECL
389#define GET_SMInfoTable_IMPL
390#define GET_VOP1InfoTable_DECL
391#define GET_VOP1InfoTable_IMPL
392#define GET_VOP2InfoTable_DECL
393#define GET_VOP2InfoTable_IMPL
394#define GET_VOP3InfoTable_DECL
395#define GET_VOP3InfoTable_IMPL
396#define GET_VOPC64DPPTable_DECL
397#define GET_VOPC64DPPTable_IMPL
398#define GET_VOPC64DPP8Table_DECL
399#define GET_VOPC64DPP8Table_IMPL
400#define GET_VOPCAsmOnlyInfoTable_DECL
401#define GET_VOPCAsmOnlyInfoTable_IMPL
402#define GET_VOP3CAsmOnlyInfoTable_DECL
403#define GET_VOP3CAsmOnlyInfoTable_IMPL
404#define GET_VOPDComponentTable_DECL
405#define GET_VOPDComponentTable_IMPL
406#define GET_VOPDPairs_DECL
407#define GET_VOPDPairs_IMPL
408#define GET_VOPTrue16Table_DECL
409#define GET_VOPTrue16Table_IMPL
410#define GET_WMMAOpcode2AddrMappingTable_DECL
411#define GET_WMMAOpcode2AddrMappingTable_IMPL
412#define GET_WMMAOpcode3AddrMappingTable_DECL
413#define GET_WMMAOpcode3AddrMappingTable_IMPL
414#include "AMDGPUGenSearchableTables.inc"
415
416int getMTBUFBaseOpcode(unsigned Opc) {
417 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
418 return Info ? Info->BaseOpcode : -1;
419}
420
421int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
422 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
423 return Info ? Info->Opcode : -1;
424}
425
426int getMTBUFElements(unsigned Opc) {
427 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
428 return Info ? Info->elements : 0;
429}
430
431bool getMTBUFHasVAddr(unsigned Opc) {
432 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
433 return Info ? Info->has_vaddr : false;
434}
435
436bool getMTBUFHasSrsrc(unsigned Opc) {
437 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
438 return Info ? Info->has_srsrc : false;
439}
440
441bool getMTBUFHasSoffset(unsigned Opc) {
442 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
443 return Info ? Info->has_soffset : false;
444}
445
446int getMUBUFBaseOpcode(unsigned Opc) {
447 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
448 return Info ? Info->BaseOpcode : -1;
449}
450
451int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
452 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
453 return Info ? Info->Opcode : -1;
454}
455
456int getMUBUFElements(unsigned Opc) {
457 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
458 return Info ? Info->elements : 0;
459}
460
461bool getMUBUFHasVAddr(unsigned Opc) {
462 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
463 return Info ? Info->has_vaddr : false;
464}
465
466bool getMUBUFHasSrsrc(unsigned Opc) {
467 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
468 return Info ? Info->has_srsrc : false;
469}
470
471bool getMUBUFHasSoffset(unsigned Opc) {
472 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
473 return Info ? Info->has_soffset : false;
474}
475
476bool getMUBUFIsBufferInv(unsigned Opc) {
477 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
478 return Info ? Info->IsBufferInv : false;
479}
480
481bool getMUBUFTfe(unsigned Opc) {
482 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
483 return Info ? Info->tfe : false;
484}
485
486bool getSMEMIsBuffer(unsigned Opc) {
487 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
488 return Info ? Info->IsBuffer : false;
489}
490
491bool getVOP1IsSingle(unsigned Opc) {
492 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
493 return Info ? Info->IsSingle : false;
494}
495
496bool getVOP2IsSingle(unsigned Opc) {
497 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
498 return Info ? Info->IsSingle : false;
499}
500
501bool getVOP3IsSingle(unsigned Opc) {
502 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
503 return Info ? Info->IsSingle : false;
504}
505
506bool isVOPC64DPP(unsigned Opc) {
507 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
508}
509
510bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
511
512bool getMAIIsDGEMM(unsigned Opc) {
513 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
514 return Info ? Info->is_dgemm : false;
515}
516
517bool getMAIIsGFX940XDL(unsigned Opc) {
518 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
519 return Info ? Info->is_gfx940_xdl : false;
520}
521
523 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
525 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
527 llvm_unreachable("Subtarget generation does not support VOPD!");
528}
529
530CanBeVOPD getCanBeVOPD(unsigned Opc) {
531 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
532 if (Info)
533 return {Info->CanBeVOPDX, true};
534 else
535 return {false, false};
536}
537
538unsigned getVOPDOpcode(unsigned Opc) {
539 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
540 return Info ? Info->VOPDOp : ~0u;
541}
542
543bool isVOPD(unsigned Opc) {
544 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
545}
546
547bool isMAC(unsigned Opc) {
548 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
549 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
550 Opc == AMDGPU::V_MAC_F32_e64_vi ||
551 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
552 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
553 Opc == AMDGPU::V_MAC_F16_e64_vi ||
554 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
555 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
556 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
557 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
558 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
559 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
560 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
561 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
562 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
563 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
564 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
565 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
566 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
567 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
568}
569
570bool isPermlane16(unsigned Opc) {
571 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
572 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
573 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
574 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
575 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
576 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
577 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
578 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
579}
580
581bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
582 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
583 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
584 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
585 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
586 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
587 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
588 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
589 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
590}
591
592bool isGenericAtomic(unsigned Opc) {
593 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
594 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
595 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
596 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
597 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
598 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
599 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
600 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
601 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
602 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
603 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
604 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
605 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
606 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
607 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
608 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
609 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
610 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
611 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
612}
613
614bool isTrue16Inst(unsigned Opc) {
615 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
616 return Info ? Info->IsTrue16 : false;
617}
618
620 const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
621 return Info && Info->IsInvalidSingleUseConsumer;
622}
623
625 const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
626 return Info && Info->IsInvalidSingleUseProducer;
627}
628
629unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
630 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
631 return Info ? Info->Opcode3Addr : ~0u;
632}
633
634unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
635 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
636 return Info ? Info->Opcode2Addr : ~0u;
637}
638
639// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
640// header files, so we need to wrap it in a function that takes unsigned
641// instead.
642int getMCOpcode(uint16_t Opcode, unsigned Gen) {
643 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
644}
645
646int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
647 const VOPDInfo *Info =
648 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
649 return Info ? Info->Opcode : -1;
650}
651
652std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
653 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
654 assert(Info);
655 auto OpX = getVOPDBaseFromComponent(Info->OpX);
656 auto OpY = getVOPDBaseFromComponent(Info->OpY);
657 assert(OpX && OpY);
658 return {OpX->BaseVOP, OpY->BaseVOP};
659}
660
661namespace VOPD {
662
665
668 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
669 assert(TiedIdx == -1 || TiedIdx == Component::DST);
670 HasSrc2Acc = TiedIdx != -1;
671
672 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
673 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
674
675 auto OperandsNum = OpDesc.getNumOperands();
676 unsigned CompOprIdx;
677 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
678 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
679 MandatoryLiteralIdx = CompOprIdx;
680 break;
681 }
682 }
683}
684
685unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
686 assert(CompOprIdx < Component::MAX_OPR_NUM);
687
688 if (CompOprIdx == Component::DST)
690
691 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
692 if (CompSrcIdx < getCompParsedSrcOperandsNum())
693 return getIndexOfSrcInParsedOperands(CompSrcIdx);
694
695 // The specified operand does not exist.
696 return 0;
697}
698
700 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
701
702 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
703 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
704
705 const unsigned CompOprNum =
707 unsigned CompOprIdx;
708 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
709 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
710 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
711 ((OpXRegs[CompOprIdx] & BanksMasks) ==
712 (OpYRegs[CompOprIdx] & BanksMasks)))
713 return CompOprIdx;
714 }
715
716 return {};
717}
718
719// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
720// by the specified component. If an operand is unused
721// or is not a VGPR, the corresponding value is 0.
722//
723// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
724// for the specified component and MC operand. The callback must return 0
725// if the operand is not a register or not a VGPR.
726InstInfo::RegIndices InstInfo::getRegIndices(
727 unsigned CompIdx,
728 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
729 assert(CompIdx < COMPONENTS_NUM);
730
731 const auto &Comp = CompInfo[CompIdx];
733
734 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
735
736 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
737 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
738 RegIndices[CompOprIdx] =
739 Comp.hasRegSrcOperand(CompSrcIdx)
740 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
741 : 0;
742 }
743 return RegIndices;
744}
745
746} // namespace VOPD
747
749 return VOPD::InstInfo(OpX, OpY);
750}
751
752VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
753 const MCInstrInfo *InstrInfo) {
754 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
755 const auto &OpXDesc = InstrInfo->get(OpX);
756 const auto &OpYDesc = InstrInfo->get(OpY);
758 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
759 return VOPD::InstInfo(OpXInfo, OpYInfo);
760}
761
762namespace IsaInfo {
763
765 : STI(STI), XnackSetting(TargetIDSetting::Any),
766 SramEccSetting(TargetIDSetting::Any) {
767 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
768 XnackSetting = TargetIDSetting::Unsupported;
769 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
770 SramEccSetting = TargetIDSetting::Unsupported;
771}
772
774 // Check if xnack or sramecc is explicitly enabled or disabled. In the
775 // absence of the target features we assume we must generate code that can run
776 // in any environment.
777 SubtargetFeatures Features(FS);
778 std::optional<bool> XnackRequested;
779 std::optional<bool> SramEccRequested;
780
781 for (const std::string &Feature : Features.getFeatures()) {
782 if (Feature == "+xnack")
783 XnackRequested = true;
784 else if (Feature == "-xnack")
785 XnackRequested = false;
786 else if (Feature == "+sramecc")
787 SramEccRequested = true;
788 else if (Feature == "-sramecc")
789 SramEccRequested = false;
790 }
791
792 bool XnackSupported = isXnackSupported();
793 bool SramEccSupported = isSramEccSupported();
794
795 if (XnackRequested) {
796 if (XnackSupported) {
797 XnackSetting =
798 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
799 } else {
800 // If a specific xnack setting was requested and this GPU does not support
801 // xnack emit a warning. Setting will remain set to "Unsupported".
802 if (*XnackRequested) {
803 errs() << "warning: xnack 'On' was requested for a processor that does "
804 "not support it!\n";
805 } else {
806 errs() << "warning: xnack 'Off' was requested for a processor that "
807 "does not support it!\n";
808 }
809 }
810 }
811
812 if (SramEccRequested) {
813 if (SramEccSupported) {
814 SramEccSetting =
815 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
816 } else {
817 // If a specific sramecc setting was requested and this GPU does not
818 // support sramecc emit a warning. Setting will remain set to
819 // "Unsupported".
820 if (*SramEccRequested) {
821 errs() << "warning: sramecc 'On' was requested for a processor that "
822 "does not support it!\n";
823 } else {
824 errs() << "warning: sramecc 'Off' was requested for a processor that "
825 "does not support it!\n";
826 }
827 }
828 }
829}
830
831static TargetIDSetting
833 if (FeatureString.ends_with("-"))
835 if (FeatureString.ends_with("+"))
836 return TargetIDSetting::On;
837
838 llvm_unreachable("Malformed feature string");
839}
840
842 SmallVector<StringRef, 3> TargetIDSplit;
843 TargetID.split(TargetIDSplit, ':');
844
845 for (const auto &FeatureString : TargetIDSplit) {
846 if (FeatureString.starts_with("xnack"))
847 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
848 if (FeatureString.starts_with("sramecc"))
849 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
850 }
851}
852
853std::string AMDGPUTargetID::toString() const {
854 std::string StringRep;
855 raw_string_ostream StreamRep(StringRep);
856
857 auto TargetTriple = STI.getTargetTriple();
858 auto Version = getIsaVersion(STI.getCPU());
859
860 StreamRep << TargetTriple.getArchName() << '-'
861 << TargetTriple.getVendorName() << '-'
862 << TargetTriple.getOSName() << '-'
863 << TargetTriple.getEnvironmentName() << '-';
864
865 std::string Processor;
866 // TODO: Following else statement is present here because we used various
867 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
868 // Remove once all aliases are removed from GCNProcessors.td.
869 if (Version.Major >= 9)
870 Processor = STI.getCPU().str();
871 else
872 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
873 Twine(Version.Stepping))
874 .str();
875
876 std::string Features;
877 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
878 // sramecc.
880 Features += ":sramecc-";
882 Features += ":sramecc+";
883 // xnack.
885 Features += ":xnack-";
887 Features += ":xnack+";
888 }
889
890 StreamRep << Processor << Features;
891
892 StreamRep.flush();
893 return StringRep;
894}
895
896unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
897 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
898 return 16;
899 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
900 return 32;
901
902 return 64;
903}
904
906 unsigned BytesPerCU = 0;
907 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
908 BytesPerCU = 32768;
909 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
910 BytesPerCU = 65536;
911
912 // "Per CU" really means "per whatever functional block the waves of a
913 // workgroup must share". So the effective local memory size is doubled in
914 // WGP mode on gfx10.
915 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
916 BytesPerCU *= 2;
917
918 return BytesPerCU;
919}
920
922 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
923 return 32768;
924 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
925 return 65536;
926 return 0;
927}
928
929unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
930 // "Per CU" really means "per whatever functional block the waves of a
931 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
932 // two SIMDs.
933 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
934 return 2;
935 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
936 // two CUs, so a total of four SIMDs.
937 return 4;
938}
939
941 unsigned FlatWorkGroupSize) {
942 assert(FlatWorkGroupSize != 0);
943 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
944 return 8;
945 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
946 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
947 if (N == 1) {
948 // Single-wave workgroups don't consume barrier resources.
949 return MaxWaves;
950 }
951
952 unsigned MaxBarriers = 16;
953 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
954 MaxBarriers = 32;
955
956 return std::min(MaxWaves / N, MaxBarriers);
957}
958
959unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
960 return 1;
961}
962
963unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
964 // FIXME: Need to take scratch memory into account.
965 if (isGFX90A(*STI))
966 return 8;
967 if (!isGFX10Plus(*STI))
968 return 10;
969 return hasGFX10_3Insts(*STI) ? 16 : 20;
970}
971
973 unsigned FlatWorkGroupSize) {
974 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
975 getEUsPerCU(STI));
976}
977
979 return 1;
980}
981
983 // Some subtargets allow encoding 2048, but this isn't tested or supported.
984 return 1024;
985}
986
988 unsigned FlatWorkGroupSize) {
989 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
990}
991
994 if (Version.Major >= 10)
995 return getAddressableNumSGPRs(STI);
996 if (Version.Major >= 8)
997 return 16;
998 return 8;
999}
1000
1002 return 8;
1003}
1004
1005unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1007 if (Version.Major >= 8)
1008 return 800;
1009 return 512;
1010}
1011
1013 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1015
1017 if (Version.Major >= 10)
1018 return 106;
1019 if (Version.Major >= 8)
1020 return 102;
1021 return 104;
1022}
1023
1024unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1025 assert(WavesPerEU != 0);
1026
1028 if (Version.Major >= 10)
1029 return 0;
1030
1031 if (WavesPerEU >= getMaxWavesPerEU(STI))
1032 return 0;
1033
1034 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1035 if (STI->getFeatureBits().test(FeatureTrapHandler))
1036 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1037 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1038 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1039}
1040
1041unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1042 bool Addressable) {
1043 assert(WavesPerEU != 0);
1044
1045 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1047 if (Version.Major >= 10)
1048 return Addressable ? AddressableNumSGPRs : 108;
1049 if (Version.Major >= 8 && !Addressable)
1050 AddressableNumSGPRs = 112;
1051 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1052 if (STI->getFeatureBits().test(FeatureTrapHandler))
1053 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1054 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1055 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1056}
1057
1058unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1059 bool FlatScrUsed, bool XNACKUsed) {
1060 unsigned ExtraSGPRs = 0;
1061 if (VCCUsed)
1062 ExtraSGPRs = 2;
1063
1065 if (Version.Major >= 10)
1066 return ExtraSGPRs;
1067
1068 if (Version.Major < 8) {
1069 if (FlatScrUsed)
1070 ExtraSGPRs = 4;
1071 } else {
1072 if (XNACKUsed)
1073 ExtraSGPRs = 4;
1074
1075 if (FlatScrUsed ||
1076 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1077 ExtraSGPRs = 6;
1078 }
1079
1080 return ExtraSGPRs;
1081}
1082
1083unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1084 bool FlatScrUsed) {
1085 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1086 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1087}
1088
1089static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1090 unsigned Granule) {
1091 return divideCeil(std::max(1u, NumRegs), Granule);
1092}
1093
1094unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1095 // SGPRBlocks is actual number of SGPR blocks minus 1.
1097 1;
1098}
1099
1101 std::optional<bool> EnableWavefrontSize32) {
1102 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1103 return 8;
1104
1105 bool IsWave32 = EnableWavefrontSize32 ?
1106 *EnableWavefrontSize32 :
1107 STI->getFeatureBits().test(FeatureWavefrontSize32);
1108
1109 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1110 return IsWave32 ? 24 : 12;
1111
1112 if (hasGFX10_3Insts(*STI))
1113 return IsWave32 ? 16 : 8;
1114
1115 return IsWave32 ? 8 : 4;
1116}
1117
1119 std::optional<bool> EnableWavefrontSize32) {
1120 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1121 return 8;
1122
1123 bool IsWave32 = EnableWavefrontSize32 ?
1124 *EnableWavefrontSize32 :
1125 STI->getFeatureBits().test(FeatureWavefrontSize32);
1126
1127 return IsWave32 ? 8 : 4;
1128}
1129
1130unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1131 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1132 return 512;
1133 if (!isGFX10Plus(*STI))
1134 return 256;
1135 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1136 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1137 return IsWave32 ? 1536 : 768;
1138 return IsWave32 ? 1024 : 512;
1139}
1140
1141unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1142
1144 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1145 return 512;
1146 return getAddressableNumArchVGPRs(STI);
1147}
1148
1150 unsigned NumVGPRs) {
1152 getMaxWavesPerEU(STI),
1153 getTotalNumVGPRs(STI));
1154}
1155
1156unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1157 unsigned MaxWaves,
1158 unsigned TotalNumVGPRs) {
1159 if (NumVGPRs < Granule)
1160 return MaxWaves;
1161 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1162 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1163}
1164
1165unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1167 if (Gen >= AMDGPUSubtarget::GFX10)
1168 return MaxWaves;
1169
1171 if (SGPRs <= 80)
1172 return 10;
1173 if (SGPRs <= 88)
1174 return 9;
1175 if (SGPRs <= 100)
1176 return 8;
1177 return 7;
1178 }
1179 if (SGPRs <= 48)
1180 return 10;
1181 if (SGPRs <= 56)
1182 return 9;
1183 if (SGPRs <= 64)
1184 return 8;
1185 if (SGPRs <= 72)
1186 return 7;
1187 if (SGPRs <= 80)
1188 return 6;
1189 return 5;
1190}
1191
1192unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1193 assert(WavesPerEU != 0);
1194
1195 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1196 if (WavesPerEU >= MaxWavesPerEU)
1197 return 0;
1198
1199 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1200 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1201 unsigned Granule = getVGPRAllocGranule(STI);
1202 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1203
1204 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1205 return 0;
1206
1207 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1208 if (WavesPerEU < MinWavesPerEU)
1209 return getMinNumVGPRs(STI, MinWavesPerEU);
1210
1211 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1212 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1213 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1214}
1215
1216unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1217 assert(WavesPerEU != 0);
1218
1219 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1220 getVGPRAllocGranule(STI));
1221 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1222 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1223}
1224
1225unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1226 std::optional<bool> EnableWavefrontSize32) {
1228 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1229 1;
1230}
1231
1233 unsigned NumVGPRs,
1234 std::optional<bool> EnableWavefrontSize32) {
1236 NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
1237}
1238} // end namespace IsaInfo
1239
1241 const MCSubtargetInfo *STI) {
1243 KernelCode.amd_kernel_code_version_major = 1;
1244 KernelCode.amd_kernel_code_version_minor = 2;
1245 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1246 KernelCode.amd_machine_version_major = Version.Major;
1247 KernelCode.amd_machine_version_minor = Version.Minor;
1248 KernelCode.amd_machine_version_stepping = Version.Stepping;
1250 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1251 KernelCode.wavefront_size = 5;
1253 } else {
1254 KernelCode.wavefront_size = 6;
1255 }
1256
1257 // If the code object does not support indirect functions, then the value must
1258 // be 0xffffffff.
1259 KernelCode.call_convention = -1;
1260
1261 // These alignment values are specified in powers of two, so alignment =
1262 // 2^n. The minimum alignment is 2^4 = 16.
1263 KernelCode.kernarg_segment_alignment = 4;
1264 KernelCode.group_segment_alignment = 4;
1265 KernelCode.private_segment_alignment = 4;
1266
1267 if (Version.Major >= 10) {
1268 KernelCode.compute_pgm_resource_registers |=
1269 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1271 }
1272}
1273
1276}
1277
1280}
1281
1283 unsigned AS = GV->getAddressSpace();
1284 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1286}
1287
1289 return TT.getArch() == Triple::r600;
1290}
1291
1292std::pair<unsigned, unsigned>
1294 std::pair<unsigned, unsigned> Default,
1295 bool OnlyFirstRequired) {
1296 Attribute A = F.getFnAttribute(Name);
1297 if (!A.isStringAttribute())
1298 return Default;
1299
1300 LLVMContext &Ctx = F.getContext();
1301 std::pair<unsigned, unsigned> Ints = Default;
1302 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1303 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1304 Ctx.emitError("can't parse first integer attribute " + Name);
1305 return Default;
1306 }
1307 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1308 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1309 Ctx.emitError("can't parse second integer attribute " + Name);
1310 return Default;
1311 }
1312 }
1313
1314 return Ints;
1315}
1316
1318 unsigned Size) {
1319 assert(Size > 2);
1321
1322 Attribute A = F.getFnAttribute(Name);
1323 if (!A.isStringAttribute())
1324 return Default;
1325
1326 SmallVector<unsigned> Vals(Size, 0);
1327
1328 LLVMContext &Ctx = F.getContext();
1329
1330 StringRef S = A.getValueAsString();
1331 unsigned i = 0;
1332 for (; !S.empty() && i < Size; i++) {
1333 std::pair<StringRef, StringRef> Strs = S.split(',');
1334 unsigned IntVal;
1335 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1336 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1337 Name);
1338 return Default;
1339 }
1340 Vals[i] = IntVal;
1341 S = Strs.second;
1342 }
1343
1344 if (!S.empty() || i < Size) {
1345 Ctx.emitError("attribute " + Name +
1346 " has incorrect number of integers; expected " +
1347 llvm::utostr(Size));
1348 return Default;
1349 }
1350 return Vals;
1351}
1352
1354 return (1 << (getVmcntBitWidthLo(Version.Major) +
1355 getVmcntBitWidthHi(Version.Major))) -
1356 1;
1357}
1358
1360 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1361}
1362
1364 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1365}
1366
1368 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1369}
1370
1372 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1373}
1374
1376 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1377}
1378
1380 return (1 << getDscntBitWidth(Version.Major)) - 1;
1381}
1382
1384 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1385}
1386
1388 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1389}
1390
1392 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1393 getVmcntBitWidthLo(Version.Major));
1394 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1395 getExpcntBitWidth(Version.Major));
1396 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1397 getLgkmcntBitWidth(Version.Major));
1398 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1399 getVmcntBitWidthHi(Version.Major));
1400 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1401}
1402
1403unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1404 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1405 getVmcntBitWidthLo(Version.Major));
1406 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1407 getVmcntBitWidthHi(Version.Major));
1408 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1409}
1410
1411unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1412 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1413 getExpcntBitWidth(Version.Major));
1414}
1415
1416unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1417 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1418 getLgkmcntBitWidth(Version.Major));
1419}
1420
1422 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1423 Vmcnt = decodeVmcnt(Version, Waitcnt);
1424 Expcnt = decodeExpcnt(Version, Waitcnt);
1425 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1426}
1427
1428Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1429 Waitcnt Decoded;
1430 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1431 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1432 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1433 return Decoded;
1434}
1435
1436unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1437 unsigned Vmcnt) {
1438 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1439 getVmcntBitWidthLo(Version.Major));
1440 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1441 getVmcntBitShiftHi(Version.Major),
1442 getVmcntBitWidthHi(Version.Major));
1443}
1444
1445unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1446 unsigned Expcnt) {
1447 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1448 getExpcntBitWidth(Version.Major));
1449}
1450
1451unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1452 unsigned Lgkmcnt) {
1453 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1454 getLgkmcntBitWidth(Version.Major));
1455}
1456
1458 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1459 unsigned Waitcnt = getWaitcntBitMask(Version);
1461 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1462 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1463 return Waitcnt;
1464}
1465
1466unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1467 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1468}
1469
1471 bool IsStore) {
1472 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1473 getDscntBitWidth(Version.Major));
1474 if (IsStore) {
1475 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1476 getStorecntBitWidth(Version.Major));
1477 return Dscnt | Storecnt;
1478 } else {
1479 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1480 getLoadcntBitWidth(Version.Major));
1481 return Dscnt | Loadcnt;
1482 }
1483}
1484
1485Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1486 Waitcnt Decoded;
1487 Decoded.LoadCnt =
1488 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1489 getLoadcntBitWidth(Version.Major));
1490 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1491 getDscntBitWidth(Version.Major));
1492 return Decoded;
1493}
1494
1495Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1496 Waitcnt Decoded;
1497 Decoded.StoreCnt =
1498 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1499 getStorecntBitWidth(Version.Major));
1500 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1501 getDscntBitWidth(Version.Major));
1502 return Decoded;
1503}
1504
1505static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1506 unsigned Loadcnt) {
1507 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1508 getLoadcntBitWidth(Version.Major));
1509}
1510
1511static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1512 unsigned Storecnt) {
1513 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1514 getStorecntBitWidth(Version.Major));
1515}
1516
1517static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1518 unsigned Dscnt) {
1519 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1520 getDscntBitWidth(Version.Major));
1521}
1522
1523static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1524 unsigned Dscnt) {
1525 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1526 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1528 return Waitcnt;
1529}
1530
1531unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1532 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1533}
1534
1536 unsigned Storecnt, unsigned Dscnt) {
1537 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1538 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1540 return Waitcnt;
1541}
1542
1544 const Waitcnt &Decoded) {
1545 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1546}
1547
1548//===----------------------------------------------------------------------===//
1549// Custom Operand Values
1550//===----------------------------------------------------------------------===//
1551
1553 int Size,
1554 const MCSubtargetInfo &STI) {
1555 unsigned Enc = 0;
1556 for (int Idx = 0; Idx < Size; ++Idx) {
1557 const auto &Op = Opr[Idx];
1558 if (Op.isSupported(STI))
1559 Enc |= Op.encode(Op.Default);
1560 }
1561 return Enc;
1562}
1563
1565 int Size, unsigned Code,
1566 bool &HasNonDefaultVal,
1567 const MCSubtargetInfo &STI) {
1568 unsigned UsedOprMask = 0;
1569 HasNonDefaultVal = false;
1570 for (int Idx = 0; Idx < Size; ++Idx) {
1571 const auto &Op = Opr[Idx];
1572 if (!Op.isSupported(STI))
1573 continue;
1574 UsedOprMask |= Op.getMask();
1575 unsigned Val = Op.decode(Code);
1576 if (!Op.isValid(Val))
1577 return false;
1578 HasNonDefaultVal |= (Val != Op.Default);
1579 }
1580 return (Code & ~UsedOprMask) == 0;
1581}
1582
1583static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1584 unsigned Code, int &Idx, StringRef &Name,
1585 unsigned &Val, bool &IsDefault,
1586 const MCSubtargetInfo &STI) {
1587 while (Idx < Size) {
1588 const auto &Op = Opr[Idx++];
1589 if (Op.isSupported(STI)) {
1590 Name = Op.Name;
1591 Val = Op.decode(Code);
1592 IsDefault = (Val == Op.Default);
1593 return true;
1594 }
1595 }
1596
1597 return false;
1598}
1599
1601 int64_t InputVal) {
1602 if (InputVal < 0 || InputVal > Op.Max)
1603 return OPR_VAL_INVALID;
1604 return Op.encode(InputVal);
1605}
1606
1607static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1608 const StringRef Name, int64_t InputVal,
1609 unsigned &UsedOprMask,
1610 const MCSubtargetInfo &STI) {
1611 int InvalidId = OPR_ID_UNKNOWN;
1612 for (int Idx = 0; Idx < Size; ++Idx) {
1613 const auto &Op = Opr[Idx];
1614 if (Op.Name == Name) {
1615 if (!Op.isSupported(STI)) {
1616 InvalidId = OPR_ID_UNSUPPORTED;
1617 continue;
1618 }
1619 auto OprMask = Op.getMask();
1620 if (OprMask & UsedOprMask)
1621 return OPR_ID_DUPLICATE;
1622 UsedOprMask |= OprMask;
1623 return encodeCustomOperandVal(Op, InputVal);
1624 }
1625 }
1626 return InvalidId;
1627}
1628
1629//===----------------------------------------------------------------------===//
1630// DepCtr
1631//===----------------------------------------------------------------------===//
1632
1633namespace DepCtr {
1634
1636 static int Default = -1;
1637 if (Default == -1)
1639 return Default;
1640}
1641
1642bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1643 const MCSubtargetInfo &STI) {
1645 HasNonDefaultVal, STI);
1646}
1647
1648bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1649 bool &IsDefault, const MCSubtargetInfo &STI) {
1650 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1651 IsDefault, STI);
1652}
1653
1654int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1655 const MCSubtargetInfo &STI) {
1656 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1657 STI);
1658}
1659
1660unsigned decodeFieldVmVsrc(unsigned Encoded) {
1661 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1662}
1663
1664unsigned decodeFieldVaVdst(unsigned Encoded) {
1665 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1666}
1667
1668unsigned decodeFieldSaSdst(unsigned Encoded) {
1669 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1670}
1671
1672unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1673 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1674}
1675
1676unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1677 return encodeFieldVmVsrc(0xffff, VmVsrc);
1678}
1679
1680unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1681 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1682}
1683
1684unsigned encodeFieldVaVdst(unsigned VaVdst) {
1685 return encodeFieldVaVdst(0xffff, VaVdst);
1686}
1687
1688unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1689 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1690}
1691
1692unsigned encodeFieldSaSdst(unsigned SaSdst) {
1693 return encodeFieldSaSdst(0xffff, SaSdst);
1694}
1695
1696} // namespace DepCtr
1697
1698//===----------------------------------------------------------------------===//
1699// exp tgt
1700//===----------------------------------------------------------------------===//
1701
1702namespace Exp {
1703
1704struct ExpTgt {
1706 unsigned Tgt;
1707 unsigned MaxIndex;
1708};
1709
1710static constexpr ExpTgt ExpTgtInfo[] = {
1711 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1712 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1713 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1714 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1715 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1716 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1717 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1718};
1719
1720bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1721 for (const ExpTgt &Val : ExpTgtInfo) {
1722 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1723 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1724 Name = Val.Name;
1725 return true;
1726 }
1727 }
1728 return false;
1729}
1730
1731unsigned getTgtId(const StringRef Name) {
1732
1733 for (const ExpTgt &Val : ExpTgtInfo) {
1734 if (Val.MaxIndex == 0 && Name == Val.Name)
1735 return Val.Tgt;
1736
1737 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1738 StringRef Suffix = Name.drop_front(Val.Name.size());
1739
1740 unsigned Id;
1741 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1742 return ET_INVALID;
1743
1744 // Disable leading zeroes
1745 if (Suffix.size() > 1 && Suffix[0] == '0')
1746 return ET_INVALID;
1747
1748 return Val.Tgt + Id;
1749 }
1750 }
1751 return ET_INVALID;
1752}
1753
1754bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1755 switch (Id) {
1756 case ET_NULL:
1757 return !isGFX11Plus(STI);
1758 case ET_POS4:
1759 case ET_PRIM:
1760 return isGFX10Plus(STI);
1761 case ET_DUAL_SRC_BLEND0:
1762 case ET_DUAL_SRC_BLEND1:
1763 return isGFX11Plus(STI);
1764 default:
1765 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1766 return !isGFX11Plus(STI);
1767 return true;
1768 }
1769}
1770
1771} // namespace Exp
1772
1773//===----------------------------------------------------------------------===//
1774// MTBUF Format
1775//===----------------------------------------------------------------------===//
1776
1777namespace MTBUFFormat {
1778
1779int64_t getDfmt(const StringRef Name) {
1780 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1781 if (Name == DfmtSymbolic[Id])
1782 return Id;
1783 }
1784 return DFMT_UNDEF;
1785}
1786
1788 assert(Id <= DFMT_MAX);
1789 return DfmtSymbolic[Id];
1790}
1791
1793 if (isSI(STI) || isCI(STI))
1794 return NfmtSymbolicSICI;
1795 if (isVI(STI) || isGFX9(STI))
1796 return NfmtSymbolicVI;
1797 return NfmtSymbolicGFX10;
1798}
1799
1800int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1801 auto lookupTable = getNfmtLookupTable(STI);
1802 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1803 if (Name == lookupTable[Id])
1804 return Id;
1805 }
1806 return NFMT_UNDEF;
1807}
1808
1809StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1810 assert(Id <= NFMT_MAX);
1811 return getNfmtLookupTable(STI)[Id];
1812}
1813
1814bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1815 unsigned Dfmt;
1816 unsigned Nfmt;
1817 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1818 return isValidNfmt(Nfmt, STI);
1819}
1820
1821bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1822 return !getNfmtName(Id, STI).empty();
1823}
1824
1825int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1826 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1827}
1828
1829void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1830 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1831 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1832}
1833
1835 if (isGFX11Plus(STI)) {
1836 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1837 if (Name == UfmtSymbolicGFX11[Id])
1838 return Id;
1839 }
1840 } else {
1841 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1842 if (Name == UfmtSymbolicGFX10[Id])
1843 return Id;
1844 }
1845 }
1846 return UFMT_UNDEF;
1847}
1848
1850 if(isValidUnifiedFormat(Id, STI))
1851 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1852 return "";
1853}
1854
1855bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1856 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1857}
1858
1859int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1860 const MCSubtargetInfo &STI) {
1861 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1862 if (isGFX11Plus(STI)) {
1863 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1864 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1865 return Id;
1866 }
1867 } else {
1868 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1869 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1870 return Id;
1871 }
1872 }
1873 return UFMT_UNDEF;
1874}
1875
1876bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1877 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1878}
1879
1881 if (isGFX10Plus(STI))
1882 return UFMT_DEFAULT;
1883 return DFMT_NFMT_DEFAULT;
1884}
1885
1886} // namespace MTBUFFormat
1887
1888//===----------------------------------------------------------------------===//
1889// SendMsg
1890//===----------------------------------------------------------------------===//
1891
1892namespace SendMsg {
1893
1896}
1897
1898bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1899 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1900}
1901
1902bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1903 bool Strict) {
1904 assert(isValidMsgId(MsgId, STI));
1905
1906 if (!Strict)
1907 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1908
1909 if (msgRequiresOp(MsgId, STI)) {
1910 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
1911 return false;
1912
1913 return !getMsgOpName(MsgId, OpId, STI).empty();
1914 }
1915
1916 return OpId == OP_NONE_;
1917}
1918
1919bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1920 const MCSubtargetInfo &STI, bool Strict) {
1921 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1922
1923 if (!Strict)
1924 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1925
1926 if (!isGFX11Plus(STI)) {
1927 switch (MsgId) {
1928 case ID_GS_PreGFX11:
1931 return (OpId == OP_GS_NOP) ?
1934 }
1935 }
1936 return StreamId == STREAM_ID_NONE_;
1937}
1938
1939bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1940 return MsgId == ID_SYSMSG ||
1941 (!isGFX11Plus(STI) &&
1942 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1943}
1944
1945bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1946 const MCSubtargetInfo &STI) {
1947 return !isGFX11Plus(STI) &&
1948 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1949 OpId != OP_GS_NOP;
1950}
1951
1952void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1953 uint16_t &StreamId, const MCSubtargetInfo &STI) {
1954 MsgId = Val & getMsgIdMask(STI);
1955 if (isGFX11Plus(STI)) {
1956 OpId = 0;
1957 StreamId = 0;
1958 } else {
1959 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1961 }
1962}
1963
1965 uint64_t OpId,
1967 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1968}
1969
1970} // namespace SendMsg
1971
1972//===----------------------------------------------------------------------===//
1973//
1974//===----------------------------------------------------------------------===//
1975
1977 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1978}
1979
1981 // As a safe default always respond as if PS has color exports.
1982 return F.getFnAttributeAsParsedInteger(
1983 "amdgpu-color-export",
1984 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1985}
1986
1988 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1989}
1990
1992 switch(cc) {
2002 return true;
2003 default:
2004 return false;
2005 }
2006}
2007
2009 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2010}
2011
2013 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2014}
2015
2017 switch (CC) {
2027 return true;
2028 default:
2029 return false;
2030 }
2031}
2032
2034 switch (CC) {
2036 return true;
2037 default:
2038 return isEntryFunctionCC(CC) || isChainCC(CC);
2039 }
2040}
2041
2043 switch (CC) {
2046 return true;
2047 default:
2048 return false;
2049 }
2050}
2051
2052bool isKernelCC(const Function *Func) {
2053 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2054}
2055
2056bool hasXNACK(const MCSubtargetInfo &STI) {
2057 return STI.hasFeature(AMDGPU::FeatureXNACK);
2058}
2059
2060bool hasSRAMECC(const MCSubtargetInfo &STI) {
2061 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2062}
2063
2065 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2066}
2067
2068bool hasA16(const MCSubtargetInfo &STI) {
2069 return STI.hasFeature(AMDGPU::FeatureA16);
2070}
2071
2072bool hasG16(const MCSubtargetInfo &STI) {
2073 return STI.hasFeature(AMDGPU::FeatureG16);
2074}
2075
2077 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2078 !isSI(STI);
2079}
2080
2081bool hasGDS(const MCSubtargetInfo &STI) {
2082 return STI.hasFeature(AMDGPU::FeatureGDS);
2083}
2084
2085unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2086 auto Version = getIsaVersion(STI.getCPU());
2087 if (Version.Major == 10)
2088 return Version.Minor >= 3 ? 13 : 5;
2089 if (Version.Major == 11)
2090 return 5;
2091 if (Version.Major >= 12)
2092 return HasSampler ? 4 : 5;
2093 return 0;
2094}
2095
2096unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2097
2098bool isSI(const MCSubtargetInfo &STI) {
2099 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2100}
2101
2102bool isCI(const MCSubtargetInfo &STI) {
2103 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2104}
2105
2106bool isVI(const MCSubtargetInfo &STI) {
2107 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2108}
2109
2110bool isGFX9(const MCSubtargetInfo &STI) {
2111 return STI.hasFeature(AMDGPU::FeatureGFX9);
2112}
2113
2115 return isGFX9(STI) || isGFX10(STI);
2116}
2117
2119 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2120}
2121
2123 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2124}
2125
2126bool isGFX8Plus(const MCSubtargetInfo &STI) {
2127 return isVI(STI) || isGFX9Plus(STI);
2128}
2129
2130bool isGFX9Plus(const MCSubtargetInfo &STI) {
2131 return isGFX9(STI) || isGFX10Plus(STI);
2132}
2133
2134bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2135
2136bool isGFX10(const MCSubtargetInfo &STI) {
2137 return STI.hasFeature(AMDGPU::FeatureGFX10);
2138}
2139
2141 return isGFX10(STI) || isGFX11(STI);
2142}
2143
2145 return isGFX10(STI) || isGFX11Plus(STI);
2146}
2147
2148bool isGFX11(const MCSubtargetInfo &STI) {
2149 return STI.hasFeature(AMDGPU::FeatureGFX11);
2150}
2151
2153 return isGFX11(STI) || isGFX12Plus(STI);
2154}
2155
2156bool isGFX12(const MCSubtargetInfo &STI) {
2157 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2158}
2159
2160bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2161
2162bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2163
2165 return !isGFX11Plus(STI);
2166}
2167
2169 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2170}
2171
2173 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2174}
2175
2177 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2178}
2179
2181 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2182}
2183
2185 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2186}
2187
2189 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2190}
2191
2193 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2194}
2195
2196bool isGFX90A(const MCSubtargetInfo &STI) {
2197 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2198}
2199
2200bool isGFX940(const MCSubtargetInfo &STI) {
2201 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2202}
2203
2205 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2206}
2207
2209 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2210}
2211
2212bool hasVOPD(const MCSubtargetInfo &STI) {
2213 return STI.hasFeature(AMDGPU::FeatureVOPD);
2214}
2215
2217 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2218}
2219
2221 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2222}
2223
2224int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2225 int32_t ArgNumVGPR) {
2226 if (has90AInsts && ArgNumAGPR)
2227 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2228 return std::max(ArgNumVGPR, ArgNumAGPR);
2229}
2230
2231bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2232 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2233 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2234 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2235 Reg == AMDGPU::SCC;
2236}
2237
2238bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2239 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2240}
2241
2242#define MAP_REG2REG \
2243 using namespace AMDGPU; \
2244 switch(Reg) { \
2245 default: return Reg; \
2246 CASE_CI_VI(FLAT_SCR) \
2247 CASE_CI_VI(FLAT_SCR_LO) \
2248 CASE_CI_VI(FLAT_SCR_HI) \
2249 CASE_VI_GFX9PLUS(TTMP0) \
2250 CASE_VI_GFX9PLUS(TTMP1) \
2251 CASE_VI_GFX9PLUS(TTMP2) \
2252 CASE_VI_GFX9PLUS(TTMP3) \
2253 CASE_VI_GFX9PLUS(TTMP4) \
2254 CASE_VI_GFX9PLUS(TTMP5) \
2255 CASE_VI_GFX9PLUS(TTMP6) \
2256 CASE_VI_GFX9PLUS(TTMP7) \
2257 CASE_VI_GFX9PLUS(TTMP8) \
2258 CASE_VI_GFX9PLUS(TTMP9) \
2259 CASE_VI_GFX9PLUS(TTMP10) \
2260 CASE_VI_GFX9PLUS(TTMP11) \
2261 CASE_VI_GFX9PLUS(TTMP12) \
2262 CASE_VI_GFX9PLUS(TTMP13) \
2263 CASE_VI_GFX9PLUS(TTMP14) \
2264 CASE_VI_GFX9PLUS(TTMP15) \
2265 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2266 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2267 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2268 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2269 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2270 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2271 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2272 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2273 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2274 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2275 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2276 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2277 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2278 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2279 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2280 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2281 CASE_GFXPRE11_GFX11PLUS(M0) \
2282 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2283 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2284 }
2285
2286#define CASE_CI_VI(node) \
2287 assert(!isSI(STI)); \
2288 case node: return isCI(STI) ? node##_ci : node##_vi;
2289
2290#define CASE_VI_GFX9PLUS(node) \
2291 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2292
2293#define CASE_GFXPRE11_GFX11PLUS(node) \
2294 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2295
2296#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2297 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2298
2299unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2300 if (STI.getTargetTriple().getArch() == Triple::r600)
2301 return Reg;
2303}
2304
2305#undef CASE_CI_VI
2306#undef CASE_VI_GFX9PLUS
2307#undef CASE_GFXPRE11_GFX11PLUS
2308#undef CASE_GFXPRE11_GFX11PLUS_TO
2309
2310#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2311#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2312#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2313#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2314
2315unsigned mc2PseudoReg(unsigned Reg) {
2317}
2318
2319bool isInlineValue(unsigned Reg) {
2320 switch (Reg) {
2321 case AMDGPU::SRC_SHARED_BASE_LO:
2322 case AMDGPU::SRC_SHARED_BASE:
2323 case AMDGPU::SRC_SHARED_LIMIT_LO:
2324 case AMDGPU::SRC_SHARED_LIMIT:
2325 case AMDGPU::SRC_PRIVATE_BASE_LO:
2326 case AMDGPU::SRC_PRIVATE_BASE:
2327 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2328 case AMDGPU::SRC_PRIVATE_LIMIT:
2329 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2330 return true;
2331 case AMDGPU::SRC_VCCZ:
2332 case AMDGPU::SRC_EXECZ:
2333 case AMDGPU::SRC_SCC:
2334 return true;
2335 case AMDGPU::SGPR_NULL:
2336 return true;
2337 default:
2338 return false;
2339 }
2340}
2341
2342#undef CASE_CI_VI
2343#undef CASE_VI_GFX9PLUS
2344#undef CASE_GFXPRE11_GFX11PLUS
2345#undef CASE_GFXPRE11_GFX11PLUS_TO
2346#undef MAP_REG2REG
2347
2348bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2349 assert(OpNo < Desc.NumOperands);
2350 unsigned OpType = Desc.operands()[OpNo].OperandType;
2351 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2352 OpType <= AMDGPU::OPERAND_SRC_LAST;
2353}
2354
2355bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2356 assert(OpNo < Desc.NumOperands);
2357 unsigned OpType = Desc.operands()[OpNo].OperandType;
2358 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2359 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2360}
2361
2362bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2363 assert(OpNo < Desc.NumOperands);
2364 unsigned OpType = Desc.operands()[OpNo].OperandType;
2365 switch (OpType) {
2382 return true;
2383 default:
2384 return false;
2385 }
2386}
2387
2388bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2389 assert(OpNo < Desc.NumOperands);
2390 unsigned OpType = Desc.operands()[OpNo].OperandType;
2391 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2395}
2396
2397// Avoid using MCRegisterClass::getSize, since that function will go away
2398// (move from MC* level to Target* level). Return size in bits.
2399unsigned getRegBitWidth(unsigned RCID) {
2400 switch (RCID) {
2401 case AMDGPU::SGPR_LO16RegClassID:
2402 case AMDGPU::AGPR_LO16RegClassID:
2403 return 16;
2404 case AMDGPU::SGPR_32RegClassID:
2405 case AMDGPU::VGPR_32RegClassID:
2406 case AMDGPU::VRegOrLds_32RegClassID:
2407 case AMDGPU::AGPR_32RegClassID:
2408 case AMDGPU::VS_32RegClassID:
2409 case AMDGPU::AV_32RegClassID:
2410 case AMDGPU::SReg_32RegClassID:
2411 case AMDGPU::SReg_32_XM0RegClassID:
2412 case AMDGPU::SRegOrLds_32RegClassID:
2413 return 32;
2414 case AMDGPU::SGPR_64RegClassID:
2415 case AMDGPU::VS_64RegClassID:
2416 case AMDGPU::SReg_64RegClassID:
2417 case AMDGPU::VReg_64RegClassID:
2418 case AMDGPU::AReg_64RegClassID:
2419 case AMDGPU::SReg_64_XEXECRegClassID:
2420 case AMDGPU::VReg_64_Align2RegClassID:
2421 case AMDGPU::AReg_64_Align2RegClassID:
2422 case AMDGPU::AV_64RegClassID:
2423 case AMDGPU::AV_64_Align2RegClassID:
2424 return 64;
2425 case AMDGPU::SGPR_96RegClassID:
2426 case AMDGPU::SReg_96RegClassID:
2427 case AMDGPU::VReg_96RegClassID:
2428 case AMDGPU::AReg_96RegClassID:
2429 case AMDGPU::VReg_96_Align2RegClassID:
2430 case AMDGPU::AReg_96_Align2RegClassID:
2431 case AMDGPU::AV_96RegClassID:
2432 case AMDGPU::AV_96_Align2RegClassID:
2433 return 96;
2434 case AMDGPU::SGPR_128RegClassID:
2435 case AMDGPU::SReg_128RegClassID:
2436 case AMDGPU::VReg_128RegClassID:
2437 case AMDGPU::AReg_128RegClassID:
2438 case AMDGPU::VReg_128_Align2RegClassID:
2439 case AMDGPU::AReg_128_Align2RegClassID:
2440 case AMDGPU::AV_128RegClassID:
2441 case AMDGPU::AV_128_Align2RegClassID:
2442 return 128;
2443 case AMDGPU::SGPR_160RegClassID:
2444 case AMDGPU::SReg_160RegClassID:
2445 case AMDGPU::VReg_160RegClassID:
2446 case AMDGPU::AReg_160RegClassID:
2447 case AMDGPU::VReg_160_Align2RegClassID:
2448 case AMDGPU::AReg_160_Align2RegClassID:
2449 case AMDGPU::AV_160RegClassID:
2450 case AMDGPU::AV_160_Align2RegClassID:
2451 return 160;
2452 case AMDGPU::SGPR_192RegClassID:
2453 case AMDGPU::SReg_192RegClassID:
2454 case AMDGPU::VReg_192RegClassID:
2455 case AMDGPU::AReg_192RegClassID:
2456 case AMDGPU::VReg_192_Align2RegClassID:
2457 case AMDGPU::AReg_192_Align2RegClassID:
2458 case AMDGPU::AV_192RegClassID:
2459 case AMDGPU::AV_192_Align2RegClassID:
2460 return 192;
2461 case AMDGPU::SGPR_224RegClassID:
2462 case AMDGPU::SReg_224RegClassID:
2463 case AMDGPU::VReg_224RegClassID:
2464 case AMDGPU::AReg_224RegClassID:
2465 case AMDGPU::VReg_224_Align2RegClassID:
2466 case AMDGPU::AReg_224_Align2RegClassID:
2467 case AMDGPU::AV_224RegClassID:
2468 case AMDGPU::AV_224_Align2RegClassID:
2469 return 224;
2470 case AMDGPU::SGPR_256RegClassID:
2471 case AMDGPU::SReg_256RegClassID:
2472 case AMDGPU::VReg_256RegClassID:
2473 case AMDGPU::AReg_256RegClassID:
2474 case AMDGPU::VReg_256_Align2RegClassID:
2475 case AMDGPU::AReg_256_Align2RegClassID:
2476 case AMDGPU::AV_256RegClassID:
2477 case AMDGPU::AV_256_Align2RegClassID:
2478 return 256;
2479 case AMDGPU::SGPR_288RegClassID:
2480 case AMDGPU::SReg_288RegClassID:
2481 case AMDGPU::VReg_288RegClassID:
2482 case AMDGPU::AReg_288RegClassID:
2483 case AMDGPU::VReg_288_Align2RegClassID:
2484 case AMDGPU::AReg_288_Align2RegClassID:
2485 case AMDGPU::AV_288RegClassID:
2486 case AMDGPU::AV_288_Align2RegClassID:
2487 return 288;
2488 case AMDGPU::SGPR_320RegClassID:
2489 case AMDGPU::SReg_320RegClassID:
2490 case AMDGPU::VReg_320RegClassID:
2491 case AMDGPU::AReg_320RegClassID:
2492 case AMDGPU::VReg_320_Align2RegClassID:
2493 case AMDGPU::AReg_320_Align2RegClassID:
2494 case AMDGPU::AV_320RegClassID:
2495 case AMDGPU::AV_320_Align2RegClassID:
2496 return 320;
2497 case AMDGPU::SGPR_352RegClassID:
2498 case AMDGPU::SReg_352RegClassID:
2499 case AMDGPU::VReg_352RegClassID:
2500 case AMDGPU::AReg_352RegClassID:
2501 case AMDGPU::VReg_352_Align2RegClassID:
2502 case AMDGPU::AReg_352_Align2RegClassID:
2503 case AMDGPU::AV_352RegClassID:
2504 case AMDGPU::AV_352_Align2RegClassID:
2505 return 352;
2506 case AMDGPU::SGPR_384RegClassID:
2507 case AMDGPU::SReg_384RegClassID:
2508 case AMDGPU::VReg_384RegClassID:
2509 case AMDGPU::AReg_384RegClassID:
2510 case AMDGPU::VReg_384_Align2RegClassID:
2511 case AMDGPU::AReg_384_Align2RegClassID:
2512 case AMDGPU::AV_384RegClassID:
2513 case AMDGPU::AV_384_Align2RegClassID:
2514 return 384;
2515 case AMDGPU::SGPR_512RegClassID:
2516 case AMDGPU::SReg_512RegClassID:
2517 case AMDGPU::VReg_512RegClassID:
2518 case AMDGPU::AReg_512RegClassID:
2519 case AMDGPU::VReg_512_Align2RegClassID:
2520 case AMDGPU::AReg_512_Align2RegClassID:
2521 case AMDGPU::AV_512RegClassID:
2522 case AMDGPU::AV_512_Align2RegClassID:
2523 return 512;
2524 case AMDGPU::SGPR_1024RegClassID:
2525 case AMDGPU::SReg_1024RegClassID:
2526 case AMDGPU::VReg_1024RegClassID:
2527 case AMDGPU::AReg_1024RegClassID:
2528 case AMDGPU::VReg_1024_Align2RegClassID:
2529 case AMDGPU::AReg_1024_Align2RegClassID:
2530 case AMDGPU::AV_1024RegClassID:
2531 case AMDGPU::AV_1024_Align2RegClassID:
2532 return 1024;
2533 default:
2534 llvm_unreachable("Unexpected register class");
2535 }
2536}
2537
2538unsigned getRegBitWidth(const MCRegisterClass &RC) {
2539 return getRegBitWidth(RC.getID());
2540}
2541
2543 unsigned OpNo) {
2544 assert(OpNo < Desc.NumOperands);
2545 unsigned RCID = Desc.operands()[OpNo].RegClass;
2546 return getRegBitWidth(RCID) / 8;
2547}
2548
2549bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2551 return true;
2552
2553 uint64_t Val = static_cast<uint64_t>(Literal);
2554 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2555 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2556 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2557 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2558 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2559 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2560 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2561 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2562 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2563 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2564}
2565
2566bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2568 return true;
2569
2570 // The actual type of the operand does not seem to matter as long
2571 // as the bits match one of the inline immediate values. For example:
2572 //
2573 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2574 // so it is a legal inline immediate.
2575 //
2576 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2577 // floating-point, so it is a legal inline immediate.
2578
2579 uint32_t Val = static_cast<uint32_t>(Literal);
2580 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2581 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2582 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2583 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2584 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2585 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2586 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2587 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2588 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2589 (Val == 0x3e22f983 && HasInv2Pi);
2590}
2591
2592bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2593 if (!HasInv2Pi)
2594 return false;
2596 return true;
2597 uint16_t Val = static_cast<uint16_t>(Literal);
2598 return Val == 0x3F00 || // 0.5
2599 Val == 0xBF00 || // -0.5
2600 Val == 0x3F80 || // 1.0
2601 Val == 0xBF80 || // -1.0
2602 Val == 0x4000 || // 2.0
2603 Val == 0xC000 || // -2.0
2604 Val == 0x4080 || // 4.0
2605 Val == 0xC080 || // -4.0
2606 Val == 0x3E22; // 1.0 / (2.0 * pi)
2607}
2608
2609bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2610 return isInlinableLiteral32(Literal, HasInv2Pi);
2611}
2612
2613bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2614 if (!HasInv2Pi)
2615 return false;
2617 return true;
2618 uint16_t Val = static_cast<uint16_t>(Literal);
2619 return Val == 0x3C00 || // 1.0
2620 Val == 0xBC00 || // -1.0
2621 Val == 0x3800 || // 0.5
2622 Val == 0xB800 || // -0.5
2623 Val == 0x4000 || // 2.0
2624 Val == 0xC000 || // -2.0
2625 Val == 0x4400 || // 4.0
2626 Val == 0xC400 || // -4.0
2627 Val == 0x3118; // 1/2pi
2628}
2629
2630std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2631 // Unfortunately, the Instruction Set Architecture Reference Guide is
2632 // misleading about how the inline operands work for (packed) 16-bit
2633 // instructions. In a nutshell, the actual HW behavior is:
2634 //
2635 // - integer encodings (-16 .. 64) are always produced as sign-extended
2636 // 32-bit values
2637 // - float encodings are produced as:
2638 // - for F16 instructions: corresponding half-precision float values in
2639 // the LSBs, 0 in the MSBs
2640 // - for UI16 instructions: corresponding single-precision float value
2641 int32_t Signed = static_cast<int32_t>(Literal);
2642 if (Signed >= 0 && Signed <= 64)
2643 return 128 + Signed;
2644
2645 if (Signed >= -16 && Signed <= -1)
2646 return 192 + std::abs(Signed);
2647
2648 if (IsFloat) {
2649 // clang-format off
2650 switch (Literal) {
2651 case 0x3800: return 240; // 0.5
2652 case 0xB800: return 241; // -0.5
2653 case 0x3C00: return 242; // 1.0
2654 case 0xBC00: return 243; // -1.0
2655 case 0x4000: return 244; // 2.0
2656 case 0xC000: return 245; // -2.0
2657 case 0x4400: return 246; // 4.0
2658 case 0xC400: return 247; // -4.0
2659 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2660 default: break;
2661 }
2662 // clang-format on
2663 } else {
2664 // clang-format off
2665 switch (Literal) {
2666 case 0x3F000000: return 240; // 0.5
2667 case 0xBF000000: return 241; // -0.5
2668 case 0x3F800000: return 242; // 1.0
2669 case 0xBF800000: return 243; // -1.0
2670 case 0x40000000: return 244; // 2.0
2671 case 0xC0000000: return 245; // -2.0
2672 case 0x40800000: return 246; // 4.0
2673 case 0xC0800000: return 247; // -4.0
2674 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2675 default: break;
2676 }
2677 // clang-format on
2678 }
2679
2680 return {};
2681}
2682
2683// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2684// or nullopt.
2685std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2686 return getInlineEncodingV216(false, Literal);
2687}
2688
2689// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2690// or nullopt.
2691std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2692 int32_t Signed = static_cast<int32_t>(Literal);
2693 if (Signed >= 0 && Signed <= 64)
2694 return 128 + Signed;
2695
2696 if (Signed >= -16 && Signed <= -1)
2697 return 192 + std::abs(Signed);
2698
2699 // clang-format off
2700 switch (Literal) {
2701 case 0x3F00: return 240; // 0.5
2702 case 0xBF00: return 241; // -0.5
2703 case 0x3F80: return 242; // 1.0
2704 case 0xBF80: return 243; // -1.0
2705 case 0x4000: return 244; // 2.0
2706 case 0xC000: return 245; // -2.0
2707 case 0x4080: return 246; // 4.0
2708 case 0xC080: return 247; // -4.0
2709 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2710 default: break;
2711 }
2712 // clang-format on
2713
2714 return std::nullopt;
2715}
2716
2717// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2718// or nullopt.
2719std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2720 return getInlineEncodingV216(true, Literal);
2721}
2722
2723// Whether the given literal can be inlined for a V_PK_* instruction.
2725 switch (OpType) {
2729 return getInlineEncodingV216(false, Literal).has_value();
2733 return getInlineEncodingV216(true, Literal).has_value();
2738 default:
2739 llvm_unreachable("bad packed operand type");
2740 }
2741}
2742
2743// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2745 return getInlineEncodingV2I16(Literal).has_value();
2746}
2747
2748// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2750 return getInlineEncodingV2BF16(Literal).has_value();
2751}
2752
2753// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2755 return getInlineEncodingV2F16(Literal).has_value();
2756}
2757
2758bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2759 if (IsFP64)
2760 return !(Val & 0xffffffffu);
2761
2762 return isUInt<32>(Val) || isInt<32>(Val);
2763}
2764
2766 const Function *F = A->getParent();
2767
2768 // Arguments to compute shaders are never a source of divergence.
2769 CallingConv::ID CC = F->getCallingConv();
2770 switch (CC) {
2773 return true;
2784 // For non-compute shaders, SGPR inputs are marked with either inreg or
2785 // byval. Everything else is in VGPRs.
2786 return A->hasAttribute(Attribute::InReg) ||
2787 A->hasAttribute(Attribute::ByVal);
2788 default:
2789 // TODO: treat i1 as divergent?
2790 return A->hasAttribute(Attribute::InReg);
2791 }
2792}
2793
2794bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2795 // Arguments to compute shaders are never a source of divergence.
2797 switch (CC) {
2800 return true;
2811 // For non-compute shaders, SGPR inputs are marked with either inreg or
2812 // byval. Everything else is in VGPRs.
2813 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2814 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2815 default:
2816 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2817 }
2818}
2819
2820static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2821 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2822}
2823
2825 return isGFX9Plus(ST);
2826}
2827
2829 int64_t EncodedOffset) {
2830 if (isGFX12Plus(ST))
2831 return isUInt<23>(EncodedOffset);
2832
2833 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2834 : isUInt<8>(EncodedOffset);
2835}
2836
2838 int64_t EncodedOffset,
2839 bool IsBuffer) {
2840 if (isGFX12Plus(ST))
2841 return isInt<24>(EncodedOffset);
2842
2843 return !IsBuffer &&
2845 isInt<21>(EncodedOffset);
2846}
2847
2848static bool isDwordAligned(uint64_t ByteOffset) {
2849 return (ByteOffset & 3) == 0;
2850}
2851
2853 uint64_t ByteOffset) {
2854 if (hasSMEMByteOffset(ST))
2855 return ByteOffset;
2856
2857 assert(isDwordAligned(ByteOffset));
2858 return ByteOffset >> 2;
2859}
2860
2861std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2862 int64_t ByteOffset, bool IsBuffer) {
2863 if (isGFX12Plus(ST)) // 24 bit signed offsets
2864 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2865 : std::nullopt;
2866
2867 // The signed version is always a byte offset.
2868 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2870 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2871 : std::nullopt;
2872 }
2873
2874 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2875 return std::nullopt;
2876
2877 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2878 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2879 ? std::optional<int64_t>(EncodedOffset)
2880 : std::nullopt;
2881}
2882
2883std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2884 int64_t ByteOffset) {
2885 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2886 return std::nullopt;
2887
2888 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2889 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2890 : std::nullopt;
2891}
2892
2894 if (AMDGPU::isGFX10(ST))
2895 return 12;
2896
2897 if (AMDGPU::isGFX12(ST))
2898 return 24;
2899 return 13;
2900}
2901
2902namespace {
2903
2904struct SourceOfDivergence {
2905 unsigned Intr;
2906};
2907const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2908
2909struct AlwaysUniform {
2910 unsigned Intr;
2911};
2912const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2913
2914#define GET_SourcesOfDivergence_IMPL
2915#define GET_UniformIntrinsics_IMPL
2916#define GET_Gfx9BufferFormat_IMPL
2917#define GET_Gfx10BufferFormat_IMPL
2918#define GET_Gfx11PlusBufferFormat_IMPL
2919#include "AMDGPUGenSearchableTables.inc"
2920
2921} // end anonymous namespace
2922
2923bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2924 return lookupSourceOfDivergence(IntrID);
2925}
2926
2927bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2928 return lookupAlwaysUniform(IntrID);
2929}
2930
2932 uint8_t NumComponents,
2933 uint8_t NumFormat,
2934 const MCSubtargetInfo &STI) {
2935 return isGFX11Plus(STI)
2936 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2937 NumFormat)
2938 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2939 NumComponents, NumFormat)
2940 : getGfx9BufferFormatInfo(BitsPerComp,
2941 NumComponents, NumFormat);
2942}
2943
2945 const MCSubtargetInfo &STI) {
2946 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2947 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2948 : getGfx9BufferFormatInfo(Format);
2949}
2950
2952 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2953 OpName::src2 }) {
2954 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2955 if (Idx == -1)
2956 continue;
2957
2958 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2959 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2960 return true;
2961 }
2962
2963 return false;
2964}
2965
2966bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2967 return hasAny64BitVGPROperands(OpDesc);
2968}
2969
2971 // Currently this is 128 for all subtargets
2972 return 128;
2973}
2974
2975} // namespace AMDGPU
2976
2979 switch (S) {
2981 OS << "Unsupported";
2982 break;
2984 OS << "Any";
2985 break;
2987 OS << "Off";
2988 break;
2990 OS << "On";
2991 break;
2992 }
2993 return OS;
2994}
2995
2996} // namespace llvm
unsigned const MachineRegisterInfo * MRI
#define MAP_REG2REG
unsigned Intr
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1150
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1147
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1808
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:204
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:846
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:463
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:223
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:270
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:382
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:373
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
CanBeVOPD getCanBeVOPD(unsigned Opc)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isInvalidSingleUseProducerInst(unsigned Opc)
bool isInvalidSingleUseConsumerInst(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getKmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:269
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:260
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:268
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:259
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:378
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:379
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition: ELF.h:380
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:431
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:481
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:127
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.