LLVM 23.0.0git
AMDGPUBaseInfo.h
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
18#include <array>
19#include <functional>
20#include <utility>
21
22// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23#define GET_INSTRINFO_OPERAND_ENUM
24#include "AMDGPUGenInstrInfo.inc"
25
27
28namespace llvm {
29
30struct Align;
31class Argument;
32class Function;
33class GlobalValue;
34class MachineInstr;
35class MCInstrInfo;
36class MCRegisterClass;
37class MCRegisterInfo;
38class MCSubtargetInfo;
39class MDNode;
40class StringRef;
41class Triple;
42class raw_ostream;
43
44namespace AMDGPU {
45
46struct AMDGPUMCKernelCodeT;
47struct IsaVersion;
48
49/// Generic target versions emitted by this version of LLVM.
50///
51/// These numbers are incremented every time a codegen breaking change occurs
52/// within a generic family.
53namespace GenericVersion {
54static constexpr unsigned GFX9 = 1;
55static constexpr unsigned GFX9_4 = 1;
56static constexpr unsigned GFX10_1 = 1;
57static constexpr unsigned GFX10_3 = 1;
58static constexpr unsigned GFX11 = 1;
59static constexpr unsigned GFX12 = 1;
60static constexpr unsigned GFX12_5 = 1;
61} // namespace GenericVersion
62
63enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
64
65enum class FPType { None, FP4, FP8 };
66
67/// \returns True if \p STI is AMDHSA.
68bool isHsaAbi(const MCSubtargetInfo &STI);
69
70/// \returns Code object version from the IR module flag.
71unsigned getAMDHSACodeObjectVersion(const Module &M);
72
73/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
74unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
75
76/// \returns The default HSA code object version. This should only be used when
77/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
78/// flag or a .amdhsa_code_object_version directive)
80
81/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
82/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
83uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
84
85/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
86unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
87
88/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
89unsigned getHostcallImplicitArgPosition(unsigned COV);
90
91unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
92unsigned getCompletionActionImplicitArgPosition(unsigned COV);
93
95 unsigned Format;
96 unsigned BitsPerComp;
97 unsigned NumComponents;
98 unsigned NumFormat;
99 unsigned DataFormat;
100};
101
107
114
118
120 unsigned T16Op;
121 unsigned HiOp;
122 unsigned LoOp;
123};
124
129
130#define GET_MIMGBaseOpcode_DECL
131#define GET_MIMGDim_DECL
132#define GET_MIMGEncoding_DECL
133#define GET_MIMGLZMapping_DECL
134#define GET_MIMGMIPMapping_DECL
135#define GET_MIMGBiASMapping_DECL
136#define GET_MAIInstInfoTable_DECL
137#define GET_isMFMA_F8F6F4Table_DECL
138#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
139#define GET_True16D16Table_DECL
140#define GET_WMMAInstInfoTable_DECL
141#include "AMDGPUGenSearchableTables.inc"
142
143namespace IsaInfo {
144
145enum {
146 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
147 // doesn't spill SGPRs as much as when 80 is set.
150};
151
153
155private:
156 const MCSubtargetInfo &STI;
157 TargetIDSetting XnackSetting;
158 TargetIDSetting SramEccSetting;
159
160public:
161 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
162 ~AMDGPUTargetID() = default;
163
164 /// \return True if the current xnack setting is not "Unsupported".
165 bool isXnackSupported() const {
166 return XnackSetting != TargetIDSetting::Unsupported;
167 }
168
169 /// \returns True if the current xnack setting is "On" or "Any".
170 bool isXnackOnOrAny() const {
171 return XnackSetting == TargetIDSetting::On ||
172 XnackSetting == TargetIDSetting::Any;
173 }
174
175 /// \returns True if current xnack setting is "On" or "Off",
176 /// false otherwise.
181
182 /// \returns The current xnack TargetIDSetting, possible options are
183 /// "Unsupported", "Any", "Off", and "On".
184 TargetIDSetting getXnackSetting() const { return XnackSetting; }
185
186 /// Sets xnack setting to \p NewXnackSetting.
187 void setXnackSetting(TargetIDSetting NewXnackSetting) {
188 XnackSetting = NewXnackSetting;
189 }
190
191 /// \return True if the current sramecc setting is not "Unsupported".
192 bool isSramEccSupported() const {
193 return SramEccSetting != TargetIDSetting::Unsupported;
194 }
195
196 /// \returns True if the current sramecc setting is "On" or "Any".
197 bool isSramEccOnOrAny() const {
198 return SramEccSetting == TargetIDSetting::On ||
199 SramEccSetting == TargetIDSetting::Any;
200 }
201
202 /// \returns True if current sramecc setting is "On" or "Off",
203 /// false otherwise.
208
209 /// \returns The current sramecc TargetIDSetting, possible options are
210 /// "Unsupported", "Any", "Off", and "On".
211 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
212
213 /// Sets sramecc setting to \p NewSramEccSetting.
214 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
215 SramEccSetting = NewSramEccSetting;
216 }
217
220
221 /// Write string representation to \p OS
222 void print(raw_ostream &OS) const;
223
224 /// \returns String representation of an object.
225 std::string toString() const;
226};
227
229 const AMDGPUTargetID &TargetID) {
230 TargetID.print(OS);
231 return OS;
232}
233
234/// \returns Wavefront size for given subtarget \p STI.
235unsigned getWavefrontSize(const MCSubtargetInfo *STI);
236
237/// \returns Local memory size in bytes for given subtarget \p STI.
238unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
239
240/// \returns Maximum addressable local memory size in bytes for given subtarget
241/// \p STI.
243
244/// \returns Number of execution units per compute unit for given subtarget \p
245/// STI.
246unsigned getEUsPerCU(const MCSubtargetInfo *STI);
247
248/// \returns Maximum number of work groups per compute unit for given subtarget
249/// \p STI and limited by given \p FlatWorkGroupSize.
250unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
251 unsigned FlatWorkGroupSize);
252
253/// \returns Minimum number of waves per execution unit for given subtarget \p
254/// STI.
255unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
256
257/// \returns Maximum number of waves per execution unit for given subtarget \p
258/// STI without any kind of limitation.
259unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
260
261/// \returns Number of waves per execution unit required to support the given \p
262/// FlatWorkGroupSize.
264 unsigned FlatWorkGroupSize);
265
266/// \returns Minimum flat work group size for given subtarget \p STI.
267unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
268
269/// \returns Maximum flat work group size
270constexpr unsigned getMaxFlatWorkGroupSize() {
271 // Some subtargets allow encoding 2048, but this isn't tested or supported.
272 return 1024;
273}
274
275/// \returns Number of waves per work group for given subtarget \p STI and
276/// \p FlatWorkGroupSize.
277unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
278 unsigned FlatWorkGroupSize);
279
280/// \returns SGPR allocation granularity for given subtarget \p STI.
281unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
282
283/// \returns SGPR encoding granularity for given subtarget \p STI.
284unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
285
286/// \returns Total number of SGPRs for given subtarget \p STI.
287unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
288
289/// \returns Addressable number of SGPRs for given subtarget \p STI.
290unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
291
292/// \returns Minimum number of SGPRs that meets the given number of waves per
293/// execution unit requirement for given subtarget \p STI.
294unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
295
296/// \returns Maximum number of SGPRs that meets the given number of waves per
297/// execution unit requirement for given subtarget \p STI.
298unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
299 bool Addressable);
300
301/// \returns Number of extra SGPRs implicitly required by given subtarget \p
302/// STI when the given special registers are used.
303unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
304 bool FlatScrUsed, bool XNACKUsed);
305
306/// \returns Number of extra SGPRs implicitly required by given subtarget \p
307/// STI when the given special registers are used. XNACK is inferred from
308/// \p STI.
309unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
310 bool FlatScrUsed);
311
312/// \returns Number of SGPR blocks needed for given subtarget \p STI when
313/// \p NumSGPRs are used. \p NumSGPRs should already include any special
314/// register counts.
315unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
316
317/// \returns VGPR allocation granularity for given subtarget \p STI.
318///
319/// For subtargets which support it, \p EnableWavefrontSize32 should match
320/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
321unsigned
322getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
323 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
324
325/// \returns VGPR encoding granularity for given subtarget \p STI.
326///
327/// For subtargets which support it, \p EnableWavefrontSize32 should match
328/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
330 const MCSubtargetInfo *STI,
331 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
332
333/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
334/// returns the allocation granule for ArchVGPRs.
335unsigned getArchVGPRAllocGranule();
336
337/// \returns Total number of VGPRs for given subtarget \p STI.
338unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
339
340/// \returns Addressable number of architectural VGPRs for a given subtarget \p
341/// STI.
343
344/// \returns Addressable number of VGPRs for given subtarget \p STI.
345unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
346 unsigned DynamicVGPRBlockSize);
347
348/// \returns Minimum number of VGPRs that meets given number of waves per
349/// execution unit requirement for given subtarget \p STI.
350unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
351 unsigned DynamicVGPRBlockSize);
352
353/// \returns Maximum number of VGPRs that meets given number of waves per
354/// execution unit requirement for given subtarget \p STI.
355unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
356 unsigned DynamicVGPRBlockSize);
357
358/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
359/// subtarget \p STI.
361 unsigned NumVGPRs,
362 unsigned DynamicVGPRBlockSize);
363
364/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
365/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
366unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
367 unsigned MaxWaves,
368 unsigned TotalNumVGPRs);
369
370/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
371/// Gen.
372unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
374
375/// \returns Number of VGPR blocks needed for given subtarget \p STI when
376/// \p NumVGPRs are used. We actually return the number of blocks -1, since
377/// that's what we encode.
378///
379/// For subtargets which support it, \p EnableWavefrontSize32 should match the
380/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
382 const MCSubtargetInfo *STI, unsigned NumVGPRs,
383 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
384
385/// \returns Number of VGPR blocks that need to be allocated for the given
386/// subtarget \p STI when \p NumVGPRs are used.
388 const MCSubtargetInfo *STI, unsigned NumVGPRs,
389 unsigned DynamicVGPRBlockSize,
390 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
391
392} // end namespace IsaInfo
393
394// Represents a field in an encoded value.
395template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
397 static_assert(HighBit >= LowBit, "Invalid bit range!");
398 static constexpr unsigned Offset = LowBit;
399 static constexpr unsigned Width = HighBit - LowBit + 1;
400
402 static constexpr ValueType Default = D;
403
406
407 constexpr uint64_t encode() const { return Value; }
408 static ValueType decode(uint64_t Encoded) { return Encoded; }
409};
410
411// Represents a single bit in an encoded value.
412template <unsigned Bit, unsigned D = 0>
414
415// A helper for encoding and decoding multiple fields.
416template <typename... Fields> struct EncodingFields {
417 static constexpr uint64_t encode(Fields... Values) {
418 return ((Values.encode() << Values.Offset) | ...);
419 }
420
421 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
422 return {Fields::decode((Encoded >> Fields::Offset) &
423 maxUIntN(Fields::Width))...};
424 }
425};
426
428inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
429 return getNamedOperandIdx(Opcode, NamedIdx) != -1;
430}
431
434
455
458
460const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
461
471
473const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
474
477
480
482 MIMGBaseOpcode L;
483 MIMGBaseOpcode LZ;
484};
485
487 MIMGBaseOpcode MIP;
488 MIMGBaseOpcode NONMIP;
489};
490
492 MIMGBaseOpcode Bias;
493 MIMGBaseOpcode NoBias;
494};
495
497 MIMGBaseOpcode Offset;
498 MIMGBaseOpcode NoOffset;
499};
500
502 MIMGBaseOpcode G;
503 MIMGBaseOpcode G16;
504};
505
508
510 unsigned Opcode2Addr;
511 unsigned Opcode3Addr;
512};
513
516
519
522
525
527int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
528 unsigned VDataDwords, unsigned VAddrDwords);
529
531int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
532
534unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
535 const MIMGDimInfo *Dim, bool IsA16,
536 bool IsG16Supported);
537
546
548const MIMGInfo *getMIMGInfo(unsigned Opc);
549
551int getMTBUFBaseOpcode(unsigned Opc);
552
554int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
555
557int getMTBUFElements(unsigned Opc);
558
560bool getMTBUFHasVAddr(unsigned Opc);
561
563bool getMTBUFHasSrsrc(unsigned Opc);
564
566bool getMTBUFHasSoffset(unsigned Opc);
567
569int getMUBUFBaseOpcode(unsigned Opc);
570
572int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
573
575int getMUBUFElements(unsigned Opc);
576
578bool getMUBUFHasVAddr(unsigned Opc);
579
581bool getMUBUFHasSrsrc(unsigned Opc);
582
584bool getMUBUFHasSoffset(unsigned Opc);
585
587bool getMUBUFIsBufferInv(unsigned Opc);
588
590bool getMUBUFTfe(unsigned Opc);
591
593bool getSMEMIsBuffer(unsigned Opc);
594
596bool getVOP1IsSingle(unsigned Opc);
597
599bool getVOP2IsSingle(unsigned Opc);
600
602bool getVOP3IsSingle(unsigned Opc);
603
605bool isVOPC64DPP(unsigned Opc);
606
608bool isVOPCAsmOnly(unsigned Opc);
609
610/// Returns true if MAI operation is a double precision GEMM.
612bool getMAIIsDGEMM(unsigned Opc);
613
615bool getMAIIsGFX940XDL(unsigned Opc);
616
618bool getWMMAIsXDL(unsigned Opc);
619
620// Get an equivalent BitOp3 for a binary logical \p Opc.
621// \returns BitOp3 modifier for the logical operation or zero.
622// Used in VOPD3 conversion.
623unsigned getBitOp2(unsigned Opc);
624
625struct CanBeVOPD {
626 bool X;
627 bool Y;
628};
629
630/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
632unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
633
635CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
636
638uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
639
642 unsigned BLGP,
643 unsigned F8F8Opcode);
644
647
650 unsigned FmtB,
651 unsigned F8F8Opcode);
652
655 uint8_t NumComponents,
656 uint8_t NumFormat,
657 const MCSubtargetInfo &STI);
660 const MCSubtargetInfo &STI);
661
663int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
664
666unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
667
669int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
670 bool VOPD3);
671
673bool isVOPD(unsigned Opc);
674
676bool isMAC(unsigned Opc);
677
679bool isPermlane16(unsigned Opc);
680
682bool isGenericAtomic(unsigned Opc);
683
685bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
686
687namespace VOPD {
688
699
700// LSB mask for VGPR banks per VOPD component operand.
701// 4 banks result in a mask 3, setting 2 lower bits.
702constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
703constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
704
705enum ComponentIndex : unsigned { X = 0, Y = 1 };
707constexpr unsigned COMPONENTS_NUM = 2;
708
709// Properties of VOPD components.
711private:
712 unsigned SrcOperandsNum = 0;
713 unsigned MandatoryLiteralIdx = ~0u;
714 bool HasSrc2Acc = false;
715 unsigned NumVOPD3Mods = 0;
716 unsigned Opcode = 0;
717 bool IsVOP3 = false;
718
719public:
720 ComponentProps() = default;
721 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
722
723 // Return the total number of src operands this component has.
724 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
725
726 // Return the number of src operands of this component visible to the parser.
728 return SrcOperandsNum - HasSrc2Acc;
729 }
730
731 // Return true iif this component has a mandatory literal.
732 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
733
734 // If this component has a mandatory literal, return component operand
735 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
738 return MandatoryLiteralIdx;
739 }
740
741 // Return true iif this component has operand
742 // with component index CompSrcIdx and this operand may be a register.
743 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
744 assert(CompSrcIdx < Component::MAX_SRC_NUM);
745 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
746 }
747
748 // Return true iif this component has tied src2.
749 bool hasSrc2Acc() const { return HasSrc2Acc; }
750
751 // Return a number of source modifiers if instruction is used in VOPD3.
752 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
753
754 // Return opcode of the component.
755 unsigned getOpcode() const { return Opcode; }
756
757 // Returns if component opcode is in VOP3 encoding.
758 unsigned isVOP3() const { return IsVOP3; }
759
760 // Return index of BitOp3 operand or -1.
761 int getBitOp3OperandIdx() const;
762
763private:
764 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
765 assert(CompSrcIdx < Component::MAX_SRC_NUM);
766 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
767 }
768};
769
770enum ComponentKind : unsigned {
771 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
772 COMPONENT_X, // A VOPD instruction, X component.
773 COMPONENT_Y, // A VOPD instruction, Y component.
775};
776
777// Interface functions of this class map VOPD component operand indices
778// to indices of operands in MachineInstr/MCInst or parsed operands array.
779//
780// Note that this class operates with 3 kinds of indices:
781// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
782// - MC operand indices (they refer operands in a MachineInstr/MCInst);
783// - parsed operand indices (they refer operands in parsed operands array).
784//
785// For SINGLE components mapping between these indices is trivial.
786// But things get more complicated for COMPONENT_X and
787// COMPONENT_Y because these components share the same
788// MachineInstr/MCInst and the same parsed operands array.
789// Below is an example of component operand to parsed operand
790// mapping for the following instruction:
791//
792// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
793//
794// PARSED COMPONENT PARSED
795// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
796// -------------------------------------------------------------------
797// "v_dual_add_f32" 0
798// v_dual_add_f32 v255 0 (DST) --> 1
799// v4 1 (SRC0) --> 2
800// v5 2 (SRC1) --> 3
801// "::" 4
802// "v_dual_mov_b32" 5
803// v_dual_mov_b32 v6 0 (DST) --> 6
804// v1 1 (SRC0) --> 7
805// -------------------------------------------------------------------
806//
808private:
809 // Regular MachineInstr/MCInst operands are ordered as follows:
810 // dst, src0 [, other src operands]
811 // VOPD MachineInstr/MCInst operands are ordered as follows:
812 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
813 // Each ComponentKind has operand indices defined below.
814 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
815
816 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
817 // used if there is tied accumulator. Indexing of this array:
818 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
819 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
820 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
821 // For VOPD1/VOPD2 use column with zero modifiers.
822 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
823 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
824
825 // Parsed operands of regular instructions are ordered as follows:
826 // Mnemo dst src0 [vsrc1 ...]
827 // Parsed VOPD operands are ordered as follows:
828 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
829 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
830 // Each ComponentKind has operand indices defined below.
831 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
832 4 /* + OpX.ParsedSrcNum */};
833 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
834 2, 2, 5 /* + OpX.ParsedSrcNum */};
835
836private:
837 const ComponentKind Kind;
838 const ComponentProps PrevComp;
839 const unsigned VOPD3ModsNum;
840 const int BitOp3Idx; // Index of bitop3 operand or -1
841
842public:
843 // Create layout for COMPONENT_X or SINGLE component.
844 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
845 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
847 }
848
849 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
850 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
851 int BitOp3Idx)
852 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
853 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
854
855public:
856 // Return the index of dst operand in MCInst operands.
857 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
858
859 // Return the index of the specified src operand in MCInst operands.
860 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
861 assert(CompSrcIdx < Component::MAX_SRC_NUM);
862
863 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
864 return BitOp3Idx;
865
866 if (VOPD3) {
867 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
868 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
869 }
870
871 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
872 (Kind != SINGLE ? 1 : 0);
873 }
874
875 // Return the index of dst operand in the parsed operands array.
877 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
878 }
879
880 // Return the index of the specified src operand in the parsed operands array.
881 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
882 assert(CompSrcIdx < Component::MAX_SRC_NUM);
883 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
884 }
885
886private:
887 unsigned getPrevCompSrcNum() const {
888 return PrevComp.getCompSrcOperandsNum();
889 }
890 unsigned getPrevCompParsedSrcNum() const {
891 return PrevComp.getCompParsedSrcOperandsNum();
892 }
893 unsigned getPrevCompVOPD3ModsNum() const {
894 return PrevComp.getCompVOPD3ModsNum();
895 }
896};
897
898// Layout and properties of VOPD components.
900public:
901 // Create ComponentInfo for COMPONENT_X or SINGLE component.
904 bool VOP3Layout = false)
905 : ComponentProps(OpDesc, VOP3Layout),
907
908 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
909 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
910 bool VOP3Layout = false)
911 : ComponentProps(OpDesc, VOP3Layout),
914
915 // Map component operand index to parsed operand index.
916 // Return 0 if the specified operand does not exist.
917 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
918};
919
920// Properties of VOPD instructions.
921class InstInfo {
922private:
923 const ComponentInfo CompInfo[COMPONENTS_NUM];
924
925public:
926 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
927
928 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
929 : CompInfo{OpX, OpY} {}
930
931 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
932 : CompInfo{OprInfoX, OprInfoY} {}
933
934 const ComponentInfo &operator[](size_t ComponentIdx) const {
935 assert(ComponentIdx < COMPONENTS_NUM);
936 return CompInfo[ComponentIdx];
937 }
938
939 // Check VOPD operands constraints.
940 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
941 // for the specified component and MC operand. The callback must return 0
942 // if the operand is not a register or not a VGPR.
943 // If \p SkipSrc is set to true then constraints for source operands are not
944 // checked.
945 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
946 // even though it violates requirement to be from different banks.
947 // If \p VOPD3 is set to true both dst registers allowed to be either odd
948 // or even and instruction may have real src2 as opposed to tied accumulator.
949 bool
950 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
951 const MCRegisterInfo &MRI, bool SkipSrc = false,
952 bool AllowSameVGPR = false, bool VOPD3 = false) const {
953 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
954 VOPD3)
955 .has_value();
956 }
957
958 // Check VOPD operands constraints.
959 // Return the index of an invalid component operand, if any.
960 // If \p SkipSrc is set to true then constraints for source operands are not
961 // checked except for being from the same halves of VGPR file on gfx1250.
962 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
963 // even though it violates requirement to be from different banks.
964 // If \p VOPD3 is set to true both dst registers allowed to be either odd
965 // or even and instruction may have real src2 as opposed to tied accumulator.
966 std::optional<unsigned> getInvalidCompOperandIndex(
967 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
968 const MCRegisterInfo &MRI, bool SkipSrc = false,
969 bool AllowSameVGPR = false, bool VOPD3 = false) const;
970
971private:
973 getRegIndices(unsigned ComponentIdx,
974 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
975 bool VOPD3) const;
976};
977
978} // namespace VOPD
979
981std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
982
984// Get properties of 2 single VOP1/VOP2 instructions
985// used as components to create a VOPD instruction.
986VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
987
989// Get properties of VOPD X and Y components.
990VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
991 const MCInstrInfo *InstrInfo);
992
994bool isAsyncStore(unsigned Opc);
996bool isTensorStore(unsigned Opc);
998unsigned getTemporalHintType(const MCInstrDesc TID);
999
1001bool isTrue16Inst(unsigned Opc);
1002
1004FPType getFPDstSelType(unsigned Opc);
1005
1008
1011
1012bool isDPMACCInstruction(unsigned Opc);
1013
1015unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1016
1018unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1019
1021 const MCSubtargetInfo *STI);
1022
1023bool isGroupSegment(const GlobalValue *GV);
1024bool isGlobalSegment(const GlobalValue *GV);
1025bool isReadOnlySegment(const GlobalValue *GV);
1026
1027/// \returns True if constants should be emitted to .text section for given
1028/// target triple \p TT, false otherwise.
1030
1031/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1032/// register name. Followed by the start register number, and the register
1033/// width. Does not validate the number of registers exists in the class. Unlike
1034/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1035/// "{}".
1036std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1037
1038/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1039/// register constraint. Followed by the start register number, and the register
1040/// width. Does not validate the number of registers exists in the class.
1041std::tuple<char, unsigned, unsigned>
1043
1044/// \returns Integer value requested using \p F's \p Name attribute.
1045///
1046/// \returns \p Default if attribute is not present.
1047///
1048/// \returns \p Default and emits error if requested value cannot be converted
1049/// to integer.
1051
1052/// \returns A pair of integer values requested using \p F's \p Name attribute
1053/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1054/// is false).
1055///
1056/// \returns \p Default if attribute is not present.
1057///
1058/// \returns \p Default and emits error if one of the requested values cannot be
1059/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1060/// not present.
1061std::pair<unsigned, unsigned>
1063 std::pair<unsigned, unsigned> Default,
1064 bool OnlyFirstRequired = false);
1065
1066/// \returns A pair of integer values requested using \p F's \p Name attribute
1067/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1068/// is false).
1069///
1070/// \returns \p std::nullopt if attribute is not present.
1071///
1072/// \returns \p std::nullopt and emits error if one of the requested values
1073/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1074/// "second" value is not present.
1075std::optional<std::pair<unsigned, std::optional<unsigned>>>
1077 bool OnlyFirstRequired = false);
1078
1079/// \returns Generate a vector of integer values requested using \p F's \p Name
1080/// attribute.
1081/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1082/// if any error occurs. The corresponding error will also be emitted.
1084 unsigned Size,
1085 unsigned DefaultVal);
1086/// Similar to the function above, but returns std::nullopt if any error occurs.
1087std::optional<SmallVector<unsigned>>
1088getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1089
1090/// Checks if \p Val is inside \p MD, a !range-like metadata.
1091bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1092
1094 LOAD_CNT = 0, // VMcnt prior to gfx12.
1095 DS_CNT, // LKGMcnt prior to gfx12.
1097 STORE_CNT, // VScnt in gfx10/gfx11.
1100 BVH_CNT, // gfx12+ only.
1101 KM_CNT, // gfx12+ only.
1102 X_CNT, // gfx1250.
1104 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1105 VM_VSRC, // gfx12+ expert mode only.
1108};
1109
1110// Return an iterator over all counters between LOAD_CNT (the first counter)
1111// and \c MaxCounter (exclusive, default value yields an enumeration over
1112// all counters).
1115
1116} // namespace AMDGPU
1117
1118template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1119 static constexpr bool is_iterable = true;
1120};
1121
1122namespace AMDGPU {
1123
1124/// Represents the counter values to wait for in an s_waitcnt instruction.
1125///
1126/// Large values (including the maximum possible integer) can be used to
1127/// represent "don't care" waits.
1128class Waitcnt {
1129 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
1130 unsigned ExpCnt = ~0u;
1131 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
1132 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
1133 unsigned SampleCnt = ~0u; // gfx12+ only.
1134 unsigned BvhCnt = ~0u; // gfx12+ only.
1135 unsigned KmCnt = ~0u; // gfx12+ only.
1136 unsigned XCnt = ~0u; // gfx1250.
1137 unsigned VaVdst = ~0u; // gfx12+ expert scheduling mode only.
1138 unsigned VmVsrc = ~0u; // gfx12+ expert scheduling mode only.
1139
1140public:
1141 unsigned get(InstCounterType T) const {
1142 switch (T) {
1143 case LOAD_CNT:
1144 return LoadCnt;
1145 case EXP_CNT:
1146 return ExpCnt;
1147 case DS_CNT:
1148 return DsCnt;
1149 case STORE_CNT:
1150 return StoreCnt;
1151 case SAMPLE_CNT:
1152 return SampleCnt;
1153 case BVH_CNT:
1154 return BvhCnt;
1155 case KM_CNT:
1156 return KmCnt;
1157 case X_CNT:
1158 return XCnt;
1159 case VA_VDST:
1160 return VaVdst;
1161 case VM_VSRC:
1162 return VmVsrc;
1163 default:
1164 llvm_unreachable("bad InstCounterType");
1165 }
1166 }
1167 void set(InstCounterType T, unsigned Val) {
1168 switch (T) {
1169 case LOAD_CNT:
1170 LoadCnt = Val;
1171 break;
1172 case EXP_CNT:
1173 ExpCnt = Val;
1174 break;
1175 case DS_CNT:
1176 DsCnt = Val;
1177 break;
1178 case STORE_CNT:
1179 StoreCnt = Val;
1180 break;
1181 case SAMPLE_CNT:
1182 SampleCnt = Val;
1183 break;
1184 case BVH_CNT:
1185 BvhCnt = Val;
1186 break;
1187 case KM_CNT:
1188 KmCnt = Val;
1189 break;
1190 case X_CNT:
1191 XCnt = Val;
1192 break;
1193 case VA_VDST:
1194 VaVdst = Val;
1195 break;
1196 case VM_VSRC:
1197 VmVsrc = Val;
1198 break;
1199 default:
1200 llvm_unreachable("bad InstCounterType");
1201 }
1202 }
1203
1204 Waitcnt() = default;
1205 // Pre-gfx12 constructor.
1206 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1207 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1208
1209 // gfx12+ constructor.
1210 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1211 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1212 unsigned VaVdst, unsigned VmVsrc)
1213 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1214 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
1215 VaVdst(VaVdst), VmVsrc(VmVsrc) {}
1216
1217 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1218
1220 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1221 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u ||
1222 VaVdst != ~0u || VmVsrc != ~0u;
1223 }
1224
1225 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1226
1227 bool hasWaitDepctr() const { return VaVdst != ~0u || VmVsrc != ~0u; }
1228
1230 // Does the right thing provided self and Other are either both pre-gfx12
1231 // or both gfx12+.
1232 return Waitcnt(
1233 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
1234 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
1235 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
1236 std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt),
1237 std::min(VaVdst, Other.VaVdst), std::min(VmVsrc, Other.VmVsrc));
1238 }
1239
1241};
1242
1243/// Represents the hardware counter limits for different wait count types.
1245 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1246 unsigned ExpcntMax;
1247 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1248 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1249 unsigned SamplecntMax; // gfx12+ only.
1250 unsigned BvhcntMax; // gfx12+ only.
1251 unsigned KmcntMax; // gfx12+ only.
1252 unsigned XcntMax; // gfx1250.
1253 unsigned VaVdstMax; // gfx12+ expert mode only.
1254 unsigned VmVsrcMax; // gfx12+ expert mode only.
1255
1256 HardwareLimits() = default;
1257
1258 /// Initializes hardware limits from ISA version.
1260};
1261
1262// The following methods are only meaningful on targets that support
1263// S_WAITCNT.
1264
1265/// \returns Vmcnt bit mask for given isa \p Version.
1266unsigned getVmcntBitMask(const IsaVersion &Version);
1267
1268/// \returns Expcnt bit mask for given isa \p Version.
1269unsigned getExpcntBitMask(const IsaVersion &Version);
1270
1271/// \returns Lgkmcnt bit mask for given isa \p Version.
1272unsigned getLgkmcntBitMask(const IsaVersion &Version);
1273
1274/// \returns Waitcnt bit mask for given isa \p Version.
1275unsigned getWaitcntBitMask(const IsaVersion &Version);
1276
1277/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1278unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1279
1280/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1281unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1282
1283/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1284unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1285
1286/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1287/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1288/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1289/// which needs it is deprecated
1290///
1291/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1292/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1293/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1294/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1295/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1296/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1297/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1298/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1299/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1300///
1301void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1302 unsigned &Expcnt, unsigned &Lgkmcnt);
1303
1304Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1305
1306/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1307unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1308 unsigned Vmcnt);
1309
1310/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1311unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1312 unsigned Expcnt);
1313
1314/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1315unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1316 unsigned Lgkmcnt);
1317
1318/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1319/// \p Version. Should not be used on gfx12+, the instruction which needs
1320/// it is deprecated
1321///
1322/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1323/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1324/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1325/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1326/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1327/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1328/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1329/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1330/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1331/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1332///
1333/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1334/// isa \p Version.
1335///
1336unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1337 unsigned Expcnt, unsigned Lgkmcnt);
1338
1339unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1340
1341// The following methods are only meaningful on targets that support
1342// S_WAIT_*CNT, introduced with gfx12.
1343
1344/// \returns Loadcnt bit mask for given isa \p Version.
1345/// Returns 0 for versions that do not support LOADcnt
1346unsigned getLoadcntBitMask(const IsaVersion &Version);
1347
1348/// \returns Samplecnt bit mask for given isa \p Version.
1349/// Returns 0 for versions that do not support SAMPLEcnt
1350unsigned getSamplecntBitMask(const IsaVersion &Version);
1351
1352/// \returns Bvhcnt bit mask for given isa \p Version.
1353/// Returns 0 for versions that do not support BVHcnt
1354unsigned getBvhcntBitMask(const IsaVersion &Version);
1355
1356/// \returns Dscnt bit mask for given isa \p Version.
1357/// Returns 0 for versions that do not support DScnt
1358unsigned getDscntBitMask(const IsaVersion &Version);
1359
1360/// \returns Dscnt bit mask for given isa \p Version.
1361/// Returns 0 for versions that do not support KMcnt
1362unsigned getKmcntBitMask(const IsaVersion &Version);
1363
1364/// \returns Xcnt bit mask for given isa \p Version.
1365/// Returns 0 for versions that do not support Xcnt.
1366unsigned getXcntBitMask(const IsaVersion &Version);
1367
1368/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1369/// returns 0 for versions that do not support STOREcnt or VScnt.
1370/// STOREcnt and VScnt are the same counter, the name used
1371/// depends on the ISA version.
1372unsigned getStorecntBitMask(const IsaVersion &Version);
1373
1374// The following are only meaningful on targets that support
1375// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1376
1377/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1378/// isa \p Version.
1379Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1380
1381/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1382/// isa \p Version.
1383Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1384
1385/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1386/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1387/// \p Version.
1388unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1389
1390/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1391/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1392/// \p Version.
1393unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1394
1395namespace Hwreg {
1396
1399
1400struct HwregSize : EncodingField<15, 11, 32> {
1402 constexpr uint64_t encode() const { return Value - 1; }
1403 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1404};
1405
1407
1408} // namespace Hwreg
1409
1410namespace DepCtr {
1411
1413int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1414 const MCSubtargetInfo &STI);
1415bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1416 const MCSubtargetInfo &STI);
1417bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1418 bool &IsDefault, const MCSubtargetInfo &STI);
1419
1420/// \returns Maximum VaVdst value that can be encoded.
1421unsigned getVaVdstBitMask();
1422
1423/// \returns Maximum VaSdst value that can be encoded.
1424unsigned getVaSdstBitMask();
1425
1426/// \returns Maximum VaSsrc value that can be encoded.
1427unsigned getVaSsrcBitMask();
1428
1429/// \returns Maximum HoldCnt value that can be encoded.
1430unsigned getHoldCntBitMask(const IsaVersion &Version);
1431
1432/// \returns Maximum VmVsrc value that can be encoded.
1433unsigned getVmVsrcBitMask();
1434
1435/// \returns Maximum VaVcc value that can be encoded.
1436unsigned getVaVccBitMask();
1437
1438/// \returns Maximum SaSdst value that can be encoded.
1439unsigned getSaSdstBitMask();
1440
1441/// \returns Decoded VaVdst from given immediate \p Encoded.
1442unsigned decodeFieldVaVdst(unsigned Encoded);
1443
1444/// \returns Decoded VmVsrc from given immediate \p Encoded.
1445unsigned decodeFieldVmVsrc(unsigned Encoded);
1446
1447/// \returns Decoded SaSdst from given immediate \p Encoded.
1448unsigned decodeFieldSaSdst(unsigned Encoded);
1449
1450/// \returns Decoded VaSdst from given immediate \p Encoded.
1451unsigned decodeFieldVaSdst(unsigned Encoded);
1452
1453/// \returns Decoded VaVcc from given immediate \p Encoded.
1454unsigned decodeFieldVaVcc(unsigned Encoded);
1455
1456/// \returns Decoded SaSrc from given immediate \p Encoded.
1457unsigned decodeFieldVaSsrc(unsigned Encoded);
1458
1459/// \returns Decoded HoldCnt from given immediate \p Encoded.
1460unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1461
1462/// \returns \p VmVsrc as an encoded Depctr immediate.
1463unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1464
1465/// \returns \p Encoded combined with encoded \p VmVsrc.
1466unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1467
1468/// \returns \p VaVdst as an encoded Depctr immediate.
1469unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1470
1471/// \returns \p Encoded combined with encoded \p VaVdst.
1472unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1473
1474/// \returns \p SaSdst as an encoded Depctr immediate.
1475unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1476
1477/// \returns \p Encoded combined with encoded \p SaSdst.
1478unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1479
1480/// \returns \p VaSdst as an encoded Depctr immediate.
1481unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1482
1483/// \returns \p Encoded combined with encoded \p VaSdst.
1484unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1485
1486/// \returns \p VaVcc as an encoded Depctr immediate.
1487unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1488
1489/// \returns \p Encoded combined with encoded \p VaVcc.
1490unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1491
1492/// \returns \p HoldCnt as an encoded Depctr immediate.
1493unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1494
1495/// \returns \p Encoded combined with encoded \p HoldCnt.
1496unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1497 const IsaVersion &Version);
1498
1499/// \returns \p VaSsrc as an encoded Depctr immediate.
1500unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1501
1502/// \returns \p Encoded combined with encoded \p VaSsrc.
1503unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1504
1505} // namespace DepCtr
1506
1507namespace Exp {
1508
1509bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1510
1512unsigned getTgtId(const StringRef Name);
1513
1515bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1516
1517} // namespace Exp
1518
1519namespace MTBUFFormat {
1520
1522int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1523
1524void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1525
1526int64_t getDfmt(const StringRef Name);
1527
1528StringRef getDfmtName(unsigned Id);
1529
1530int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1531
1532StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1533
1534bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1535
1536bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1537
1538int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1539
1540StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1541
1542bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1543
1544int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1545 const MCSubtargetInfo &STI);
1546
1547bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1548
1549unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1550
1551} // namespace MTBUFFormat
1552
1553namespace SendMsg {
1554
1556bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1557
1559bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1560 bool Strict = true);
1561
1563bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1564 const MCSubtargetInfo &STI, bool Strict = true);
1565
1567bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1568
1570bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1571
1572void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1573 uint16_t &StreamId, const MCSubtargetInfo &STI);
1574
1576uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1577
1578/// Returns true if the message does not use the m0 operand.
1579bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
1580
1581} // namespace SendMsg
1582
1583unsigned getInitialPSInputAddr(const Function &F);
1584
1585bool getHasColorExport(const Function &F);
1586
1587bool getHasDepthExport(const Function &F);
1588
1590
1591// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1592// the attribute is missing or its value is invalid.
1593unsigned getDynamicVGPRBlockSize(const Function &F);
1594
1596constexpr bool isShader(CallingConv::ID CC) {
1597 switch (CC) {
1607 return true;
1608 default:
1609 return false;
1610 }
1611}
1612
1614constexpr bool isGraphics(CallingConv::ID CC) {
1615 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1617}
1618
1620constexpr bool isCompute(CallingConv::ID CC) {
1621 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1622}
1623
1626 switch (CC) {
1636 return true;
1637 default:
1638 return false;
1639 }
1640}
1641
1643constexpr bool isChainCC(CallingConv::ID CC) {
1644 switch (CC) {
1647 return true;
1648 default:
1649 return false;
1650 }
1651}
1652
1653// These functions are considered entrypoints into the current module, i.e. they
1654// are allowed to be called from outside the current module. This is different
1655// from isEntryFunctionCC, which is only true for functions that are entered by
1656// the hardware. Module entry points include all entry functions but also
1657// include functions that can be called from other functions inside or outside
1658// the current module. Module entry functions are allowed to allocate LDS.
1659//
1660// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1661// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1662// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1665 switch (CC) {
1668 return true;
1669 default:
1670 return isEntryFunctionCC(CC);
1671 }
1672}
1673
1675constexpr inline bool isKernel(CallingConv::ID CC) {
1676 switch (CC) {
1679 return true;
1680 default:
1681 return false;
1682 }
1683}
1684
1685inline bool isKernel(const Function &F) { return isKernel(F.getCallingConv()); }
1686
1689 return CC == CallingConv::Fast;
1690}
1691
1692/// Return true if we might ever do TCO for calls with this calling convention.
1695 switch (CC) {
1696 case CallingConv::C:
1699 return true;
1700 default:
1701 return canGuaranteeTCO(CC);
1702 }
1703}
1704
1705bool hasXNACK(const MCSubtargetInfo &STI);
1706bool hasSRAMECC(const MCSubtargetInfo &STI);
1707bool hasMIMG_R128(const MCSubtargetInfo &STI);
1708bool hasA16(const MCSubtargetInfo &STI);
1709bool hasG16(const MCSubtargetInfo &STI);
1710bool hasPackedD16(const MCSubtargetInfo &STI);
1711bool hasGDS(const MCSubtargetInfo &STI);
1712unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1713unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1714
1715bool isSI(const MCSubtargetInfo &STI);
1716bool isCI(const MCSubtargetInfo &STI);
1717bool isVI(const MCSubtargetInfo &STI);
1718bool isGFX9(const MCSubtargetInfo &STI);
1719bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1720bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1721bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1722bool isGFX8Plus(const MCSubtargetInfo &STI);
1723bool isGFX9Plus(const MCSubtargetInfo &STI);
1724bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1725bool isGFX10(const MCSubtargetInfo &STI);
1726bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1727bool isGFX10Plus(const MCSubtargetInfo &STI);
1728bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1729bool isGFX10Before1030(const MCSubtargetInfo &STI);
1730bool isGFX11(const MCSubtargetInfo &STI);
1731bool isGFX11Plus(const MCSubtargetInfo &STI);
1732bool isGFX12(const MCSubtargetInfo &STI);
1733bool isGFX12Plus(const MCSubtargetInfo &STI);
1734bool isGFX1250(const MCSubtargetInfo &STI);
1735bool isGFX1250Plus(const MCSubtargetInfo &STI);
1736bool isGFX13(const MCSubtargetInfo &STI);
1737bool isGFX13Plus(const MCSubtargetInfo &STI);
1738bool supportsWGP(const MCSubtargetInfo &STI);
1739bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1740bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1741bool isGCN3Encoding(const MCSubtargetInfo &STI);
1742bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1743bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1744bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1745bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1746bool isGFX90A(const MCSubtargetInfo &STI);
1747bool isGFX940(const MCSubtargetInfo &STI);
1749bool hasMAIInsts(const MCSubtargetInfo &STI);
1750bool hasVOPD(const MCSubtargetInfo &STI);
1751bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1752
1753inline bool supportsWave32(const MCSubtargetInfo &STI) {
1754 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1755}
1756
1757int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1758unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1760
1761/// Is Reg - scalar register
1762bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1763
1764/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1765bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1766
1767/// If \p Reg is a pseudo reg, return the correct hardware register given
1768/// \p STI otherwise return \p Reg.
1770
1771/// Convert hardware register \p Reg to a pseudo register
1774
1777
1778/// Is this an AMDGPU specific source operand? These include registers,
1779/// inline constants, literals and mandatory literals (KImm).
1780constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1781 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1782 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1783}
1784
1785inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1786 return isSISrcOperand(Desc.operands()[OpNo]);
1787}
1788
1789/// Is this a KImm operand?
1790bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1791
1792/// Is this floating-point operand?
1793bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1794
1795/// Does this operand support only inlinable literals?
1796bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1797
1798/// Get the size in bits of a register from the register class \p RC.
1799unsigned getRegBitWidth(unsigned RCID);
1800
1801/// Get the size in bits of a register from the register class \p RC.
1802unsigned getRegBitWidth(const MCRegisterClass &RC);
1803
1805inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1806 switch (OpInfo.OperandType) {
1816 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1818 return 4;
1819
1826 return 8;
1827
1842 return 2;
1843
1844 default:
1845 llvm_unreachable("unhandled operand type");
1846 }
1847}
1848
1850inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1851 return getOperandSize(Desc.operands()[OpNo]);
1852}
1853
1854/// Is this literal inlinable, and not one of the values intended for floating
1855/// point values.
1857inline bool isInlinableIntLiteral(int64_t Literal) {
1858 return Literal >= -16 && Literal <= 64;
1859}
1860
1861/// Is this literal inlinable
1863bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1864
1866bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1867
1869bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1870
1872bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1873
1875bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1876
1878std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1879
1881std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1882
1884std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1885
1887std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1888 bool IsGFX11Plus);
1889
1892
1895
1898
1901
1903bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1904
1906bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1907
1909int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1910
1911bool isArgPassedInSGPR(const Argument *Arg);
1912
1913bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1914
1915LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1916
1919 int64_t EncodedOffset);
1920
1923 int64_t EncodedOffset, bool IsBuffer);
1924
1925/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1926/// offsets.
1928
1929/// \returns The encoding that will be used for \p ByteOffset in the
1930/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1931/// S_LOAD instructions have a signed offset, on other subtargets it is
1932/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1933std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1934 int64_t ByteOffset, bool IsBuffer,
1935 bool HasSOffset = false);
1936
1937/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1938/// instruction. This is only useful on CI.s
1939std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1940 int64_t ByteOffset);
1941
1942/// For pre-GFX12 FLAT instructions the offset must be positive;
1943/// MSB is ignored and forced to zero.
1944///
1945/// \return The number of bits available for the signed offset field in flat
1946/// instructions. Note that some forms of the instruction disallow negative
1947/// offsets.
1948unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1949
1950/// \returns true if this offset is small enough to fit in the SMRD
1951/// offset field. \p ByteOffset should be the offset in bytes and
1952/// not the encoded offset.
1953bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1954
1956inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1957 if (isGFX12(ST))
1958 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1959 if (isGFX90A(ST))
1960 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1961 return false;
1962}
1963
1964/// \returns true if an instruction may have a 64-bit VGPR operand.
1966 const MCSubtargetInfo &ST);
1967
1968/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1969bool isDPALU_DPP32BitOpc(unsigned Opc);
1970
1971/// \returns true if an instruction is a DP ALU DPP.
1972bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1973 const MCSubtargetInfo &ST);
1974
1975/// \returns true if the intrinsic is divergent
1976bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1977
1978/// \returns true if the intrinsic is uniform
1979bool isIntrinsicAlwaysUniform(unsigned IntrID);
1980
1981/// \returns a register class for the physical register \p Reg if it is a VGPR
1982/// or nullptr otherwise.
1984 const MCRegisterInfo &MRI);
1985
1986/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1987/// physical register \p Reg.
1988unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1989
1990/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1992 const MCRegisterInfo &MRI);
1993
1994/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1995/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1996/// ignored.
1997std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
1998 bool HasSetregVGPRMSBFixup);
1999
2000/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
2001/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
2002/// ignored.
2003std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
2004 bool HasSetregVGPRMSBFixup);
2005
2006// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
2007// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
2008// maps, one for X and one for Y component.
2009std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
2011
2012/// \returns true if a memory instruction supports scale_offset modifier.
2013bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
2014
2015/// \returns lds block size in terms of dwords. \p
2016/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
2017/// must be defined in terms of bytes.
2018unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
2019
2021public:
2023
2024 ClusterDimsAttr() = default;
2025
2026 Kind getKind() const { return AttrKind; }
2027
2028 bool isUnknown() const { return getKind() == Kind::Unknown; }
2029
2030 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
2031
2032 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2033
2034 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2035
2037
2039
2041
2042 /// \returns the dims stored. Note that this function can only be called if
2043 /// the kind is \p Fixed.
2044 const std::array<unsigned, 3> &getDims() const;
2045
2046 bool operator==(const ClusterDimsAttr &RHS) const {
2047 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2048 }
2049
2050 std::string to_string() const;
2051
2052 static ClusterDimsAttr get(const Function &F);
2053
2054private:
2055 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
2056
2057 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2058
2059 std::array<unsigned, 3> Dims = {0, 0, 0};
2060
2061 Kind AttrKind = Kind::Unknown;
2062};
2063
2064} // namespace AMDGPU
2065
2068
2069} // end namespace llvm
2070
2071#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_READONLY
Definition Compiler.h:322
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Register const TargetRegisterInfo * TRI
#define T
Value * RHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static ClusterDimsAttr get(const Function &F)
bool operator==(const ClusterDimsAttr &RHS) const
const std::array< unsigned, 3 > & getDims() const
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
void setSramEccSetting(TargetIDSetting NewSramEccSetting)
Sets sramecc setting to NewSramEccSetting.
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
ComponentInfo(const MCInstrDesc &OpDesc, ComponentKind Kind=ComponentKind::SINGLE, bool VOP3Layout=false)
ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps, bool VOP3Layout=false)
unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const
ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum, int BitOp3Idx)
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
bool hasRegSrcOperand(unsigned CompSrcIdx) const
unsigned getMandatoryLiteralCompOperandIndex() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
bool hasInvalidOperand(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
const ComponentInfo & operator[](size_t ComponentIdx) const
InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt, unsigned VaVdst, unsigned VmVsrc)
bool hasWaitExceptStoreCnt() const
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
Waitcnt combined(const Waitcnt &Other) const
unsigned get(InstCounterType T) const
friend raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
void set(InstCounterType T, unsigned Val)
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
Metadata node.
Definition Metadata.h:1080
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
Generic target versions emitted by this version of LLVM.
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
EncodingField< 10, 6 > HwregOffset
EncodingField< 5, 0 > HwregId
EncodingFields< HwregId, HwregOffset, HwregSize > HwregEncoding
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
constexpr unsigned getMaxFlatWorkGroupSize()
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
raw_ostream & operator<<(raw_ostream &OS, const AMDGPUTargetID &TargetID)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
iota_range< InstCounterType > inst_counter_types(InstCounterType MaxCounter)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
LLVM_READONLY bool isInvalidSingleUseProducerInst(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_READONLY bool isInvalidSingleUseConsumerInst(unsigned Opc)
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
LLVM_READNONE constexpr bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getSOPPWithRelaxation(uint32_t Opcode)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
bool isGFX1250(const MCSubtargetInfo &STI)
bool supportsWave32(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool hasDynamicVGPR(const Function &F)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool canGuaranteeTCO(CallingConv::ID CC)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition MathExtras.h:207
@ Wait
Definition Threading.h:60
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
AMD Kernel Code Object (amd_kernel_code_t).
constexpr EncodingField(ValueType Value)
static ValueType decode(uint64_t Encoded)
constexpr uint64_t encode() const
static constexpr uint64_t encode(Fields... Values)
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
constexpr EncodingField(ValueType Value)
constexpr uint64_t encode() const
static ValueType decode(uint64_t Encoded)
Instruction set architecture version.