LLVM 23.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Dynamically set bits that enable features.
70 bool DynamicVGPR = false;
72 bool ScalarizeGlobal = false;
73
74 /// The maximum number of instructions that may be placed within an S_CLAUSE,
75 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
76 /// indicates a lack of S_CLAUSE support.
77 unsigned MaxHardClauseLength = 0;
78
79#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
80 bool ATTRIBUTE = DEFAULT;
81#include "AMDGPUGenSubtargetInfo.inc"
82
83private:
84 SIInstrInfo InstrInfo;
85 SITargetLowering TLInfo;
86 SIFrameLowering FrameLowering;
87
88public:
89 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
90 const GCNTargetMachine &TM);
91 ~GCNSubtarget() override;
92
94 StringRef FS);
95
96 /// Diagnose inconsistent subtarget features before attempting to codegen
97 /// function \p F.
98 void checkSubtargetFeatures(const Function &F) const;
99
100 const SIInstrInfo *getInstrInfo() const override { return &InstrInfo; }
101
102 const SIFrameLowering *getFrameLowering() const override {
103 return &FrameLowering;
104 }
105
106 const SITargetLowering *getTargetLowering() const override { return &TLInfo; }
107
108 const SIRegisterInfo *getRegisterInfo() const override {
109 return &InstrInfo.getRegisterInfo();
110 }
111
112 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
113
114 const CallLowering *getCallLowering() const override {
115 return CallLoweringInfo.get();
116 }
117
118 const InlineAsmLowering *getInlineAsmLowering() const override {
119 return InlineAsmLoweringInfo.get();
120 }
121
123 return InstSelector.get();
124 }
125
126 const LegalizerInfo *getLegalizerInfo() const override {
127 return Legalizer.get();
128 }
129
130 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
131 return RegBankInfo.get();
132 }
133
135 return TargetID;
136 }
137
139 return &InstrItins;
140 }
141
143
145
146 bool isGFX11Plus() const { return getGeneration() >= GFX11; }
147
148#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
149 bool GETTER() const override { return ATTRIBUTE; }
150#include "AMDGPUGenSubtargetInfo.inc"
151
152 unsigned getMaxWaveScratchSize() const {
153 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
154 if (getGeneration() >= GFX12) {
155 // 18-bit field in units of 64-dword.
156 return (64 * 4) * ((1 << 18) - 1);
157 }
158 if (getGeneration() == GFX11) {
159 // 15-bit field in units of 64-dword.
160 return (64 * 4) * ((1 << 15) - 1);
161 }
162 // 13-bit field in units of 256-dword.
163 return (256 * 4) * ((1 << 13) - 1);
164 }
165
166 /// Return the number of high bits known to be zero for a frame index.
170
171 int getLDSBankCount() const { return LDSBankCount; }
172
173 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
174 return (ForBufferRSrc || !hasFlatScratchEnabled()) ? MaxPrivateElementSize
175 : 16;
176 }
177
178 unsigned getConstantBusLimit(unsigned Opcode) const;
179
180 /// Returns if the result of this instruction with a 16-bit result returned in
181 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
182 /// the original value.
183 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
184
185 bool supportsWGP() const {
186 if (HasGFX1250Insts)
187 return false;
188 return getGeneration() >= GFX10;
189 }
190
191 bool hasHWFP64() const { return HasFP64; }
192
193 bool hasAddr64() const {
195 }
196
197 bool hasFlat() const {
199 }
200
201 // Return true if the target only has the reverse operand versions of VALU
202 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
203 bool hasOnlyRevVALUShifts() const {
205 }
206
207 bool hasFractBug() const { return getGeneration() == SOUTHERN_ISLANDS; }
208
209 bool hasMed3_16() const { return getGeneration() >= AMDGPUSubtarget::GFX9; }
210
211 bool hasMin3Max3_16() const {
213 }
214
215 bool hasSwap() const { return HasGFX9Insts; }
216
217 bool hasScalarPackInsts() const { return HasGFX9Insts; }
218
219 bool hasScalarMulHiInsts() const { return HasGFX9Insts; }
220
221 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
222
226
228 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
229 return getGeneration() >= GFX9;
230 }
231
232 /// True if the offset field of DS instructions works as expected. On SI, the
233 /// offset uses a 16-bit adder and does not always wrap properly.
234 bool hasUsableDSOffset() const { return getGeneration() >= SEA_ISLANDS; }
235
237 return EnableUnsafeDSOffsetFolding;
238 }
239
240 /// Condition output from div_scale is usable.
244
245 /// Extra wait hazard is needed in some cases before
246 /// s_cbranch_vccnz/s_cbranch_vccz.
247 bool hasReadVCCZBug() const { return getGeneration() <= SEA_ISLANDS; }
248
249 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
250 bool partialVCCWritesUpdateVCCZ() const { return getGeneration() >= GFX10; }
251
252 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
253 /// was written by a VALU instruction.
256 }
257
258 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
259 /// SGPR was written by a VALU Instruction.
262 }
263
264 bool hasRFEHazards() const { return getGeneration() >= VOLCANIC_ISLANDS; }
265
266 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
267 unsigned getSetRegWaitStates() const {
268 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
269 }
270
271 /// Return the amount of LDS that can be used that will not restrict the
272 /// occupancy lower than WaveCount.
273 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
274 const Function &) const;
275
278 }
279
280 /// \returns If target supports S_DENORM_MODE.
281 bool hasDenormModeInst() const {
283 }
284
285 /// \returns If target supports ds_read/write_b128 and user enables generation
286 /// of ds_read/write_b128.
287 bool useDS128() const { return HasCIInsts && EnableDS128; }
288
289 /// \return If target supports ds_read/write_b96/128.
290 bool hasDS96AndDS128() const { return HasCIInsts; }
291
292 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
293 bool haveRoundOpsF64() const { return HasCIInsts; }
294
295 /// \returns If MUBUF instructions always perform range checking, even for
296 /// buffer resources used for private memory access.
300
301 /// \returns If target requires PRT Struct NULL support (zero result registers
302 /// for sparse texture support).
303 bool usePRTStrictNull() const { return EnablePRTStrictNull; }
304
306 return HasUnalignedBufferAccess && HasUnalignedAccessMode;
307 }
308
310 return HasUnalignedDSAccess && HasUnalignedAccessMode;
311 }
312
314 return HasUnalignedScratchAccess && HasUnalignedAccessMode;
315 }
316
317 bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); }
318
319 bool isTgSplitEnabled() const { return EnableTgSplit; }
320
321 bool isCuModeEnabled() const { return EnableCuMode; }
322
323 bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }
324
325 bool hasFlatScrRegister() const { return hasFlatAddressSpace(); }
326
327 // Check if target supports ST addressing mode with FLAT scratch instructions.
328 // The ST addressing mode means no registers are used, either VGPR or SGPR,
329 // but only immediate offset is swizzled and added to the FLAT scratch base.
330 bool hasFlatScratchSTMode() const {
331 return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
332 }
333
334 bool hasFlatScratchSVSMode() const { return HasGFX940Insts || HasGFX11Insts; }
335
337 return hasArchitectedFlatScratch() ||
338 (EnableFlatScratch && hasFlatScratchInsts());
339 }
340
341 bool hasGlobalAddTidInsts() const { return HasGFX10_BEncoding; }
342
343 bool hasAtomicCSub() const { return HasGFX10_BEncoding; }
344
345 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
346
347 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
348
349 bool hasExportInsts() const {
350 return !hasGFX940Insts() && !hasGFX1250Insts();
351 }
352
353 bool hasVINTERPEncoding() const {
354 return HasGFX11Insts && !hasGFX1250Insts();
355 }
356
357 // DS_ADD_F64/DS_ADD_RTN_F64
358 bool hasLdsAtomicAddF64() const {
359 return hasGFX90AInsts() || hasGFX1250Insts();
360 }
361
363 return getGeneration() >= GFX9;
364 }
365
366 bool hasFlatLgkmVMemCountInOrder() const { return getGeneration() > GFX9; }
367
368 bool hasD16LoadStore() const { return getGeneration() >= GFX9; }
369
371 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
372 }
373
374 bool hasD16Images() const { return getGeneration() >= VOLCANIC_ISLANDS; }
375
376 /// Return if most LDS instructions have an m0 use that require m0 to be
377 /// initialized.
378 bool ldsRequiresM0Init() const { return getGeneration() < GFX9; }
379
380 // True if the hardware rewinds and replays GWS operations if a wave is
381 // preempted.
382 //
383 // If this is false, a GWS operation requires testing if a nack set the
384 // MEM_VIOL bit, and repeating if so.
385 bool hasGWSAutoReplay() const { return getGeneration() >= GFX9; }
386
387 /// \returns if target has ds_gws_sema_release_all instruction.
388 bool hasGWSSemaReleaseAll() const { return HasCIInsts; }
389
390 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
391
392 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
393
394 // Covers VS/PS/CS graphics shaders
395 bool isMesaGfxShader(const Function &F) const {
396 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
397 }
398
399 bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; }
400
401 bool hasAtomicFaddInsts() const {
402 return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
403 }
404
406 return getGeneration() < SEA_ISLANDS;
407 }
408
409 bool hasInstPrefetch() const {
410 return getGeneration() == GFX10 || getGeneration() == GFX11;
411 }
412
413 bool hasPrefetch() const { return HasGFX12Insts; }
414
415 // Has s_cmpk_* instructions.
416 bool hasSCmpK() const { return getGeneration() < GFX12; }
417
418 // Scratch is allocated in 256 dword per wave blocks for the entire
419 // wavefront. When viewed from the perspective of an arbitrary workitem, this
420 // is 4-byte aligned.
421 //
422 // Only 4-byte alignment is really needed to access anything. Transformations
423 // on the pointer value itself may rely on the alignment / known low bits of
424 // the pointer. Set this to something above the minimum to avoid needing
425 // dynamic realignment in common cases.
426 Align getStackAlignment() const { return Align(16); }
427
428 bool enableMachineScheduler() const override { return true; }
429
430 bool useAA() const override;
431
432 bool enableSubRegLiveness() const override { return true; }
433
436
437 // static wrappers
438 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
439
440 // XXX - Why is this here if it isn't in the default pass set?
441 bool enableEarlyIfConversion() const override { return true; }
442
444 const SchedRegion &Region) const override;
445
447 const SchedRegion &Region) const override;
448
449 void mirFileLoaded(MachineFunction &MF) const override;
450
451 unsigned getMaxNumUserSGPRs() const {
452 return AMDGPU::getMaxNumUserSGPRs(*this);
453 }
454
455 bool useVGPRIndexMode() const;
456
457 bool hasScalarCompareEq64() const {
459 }
460
461 bool hasLDSFPAtomicAddF32() const { return HasGFX8Insts; }
462 bool hasLDSFPAtomicAddF64() const {
463 return HasGFX90AInsts || HasGFX1250Insts;
464 }
465
466 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
467 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
468
469 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
470 bool hasPermLane64() const { return getGeneration() >= GFX11; }
471
472 bool hasDPPBroadcasts() const { return HasDPP && getGeneration() < GFX10; }
473
475 return HasDPP && getGeneration() < GFX10;
476 }
477
478 // Has V_PK_MOV_B32 opcode
479 bool hasPkMovB32() const { return HasGFX90AInsts; }
480
482 return getGeneration() >= GFX10 || hasGFX940Insts();
483 }
484
485 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
486
487 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
488
489 unsigned getNSAMaxSize(bool HasSampler = false) const {
490 return AMDGPU::getNSAMaxSize(*this, HasSampler);
491 }
492
493 bool hasMadF16() const;
494
495 bool hasMovB64() const { return HasGFX940Insts || HasGFX1250Insts; }
496
497 // Scalar and global loads support scale_offset bit.
498 bool hasScaleOffset() const { return HasGFX1250Insts; }
499
500 // FLAT GLOBAL VOffset is signed
501 bool hasSignedGVSOffset() const { return HasGFX1250Insts; }
502
504
506 return HasUserSGPRInit16Bug && isWave32();
507 }
508
512
513 // \returns true if the subtarget supports DWORDX3 load/store instructions.
514 bool hasDwordx3LoadStores() const { return HasCIInsts; }
515
519
524
527 }
528
531 }
532
534 return HasLDSMisalignedBug && !EnableCuMode;
535 }
536
537 // Shift amount of a 64 bit shift cannot be a highest allocated register
538 // if also at the end of the allocation block.
539 bool hasShift64HighRegBug() const {
540 return HasGFX90AInsts && !HasGFX940Insts;
541 }
542
543 // Has one cycle hazard on transcendental instruction feeding a
544 // non transcendental VALU.
545 bool hasTransForwardingHazard() const { return HasGFX940Insts; }
546
547 // Has one cycle hazard on a VALU instruction partially writing dst with
548 // a shift of result bits feeding another VALU instruction.
549 bool hasDstSelForwardingHazard() const { return HasGFX940Insts; }
550
551 // Cannot use op_sel with v_dot instructions.
552 bool hasDOTOpSelHazard() const { return HasGFX940Insts || HasGFX11Insts; }
553
554 // Does not have HW interlocs for VALU writing and then reading SGPRs.
555 bool hasVDecCoExecHazard() const { return HasGFX940Insts; }
556
557 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
558
560 return getGeneration() == GFX10;
561 }
562
563 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
564
565 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
566
567 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
568
570 return getGeneration() == GFX11;
571 }
572
573 bool hasCvtScaleForwardingHazard() const { return HasGFX950Insts; }
574
575 bool requiresCodeObjectV6() const { return RequiresCOV6; }
576
577 bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }
578
579 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
580
582 return HasGFX12Insts && !HasGFX1250Insts;
583 }
584
585 bool setRegModeNeedsVNOPs() const {
586 return HasGFX1250Insts && getGeneration() == GFX12;
587 }
588
589 /// Return if operations acting on VGPR tuples require even alignment.
590 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
591
592 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
593 bool hasSPackHL() const { return HasGFX11Insts; }
594
595 /// Return true if the target's EXP instruction has the COMPR flag, which
596 /// affects the meaning of the EN (enable) bits.
597 bool hasCompressedExport() const { return !HasGFX11Insts; }
598
599 /// Return true if the target's EXP instruction supports the NULL export
600 /// target.
601 bool hasNullExportTarget() const { return !HasGFX11Insts; }
602
603 bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
604
605 /// Return true if the target has the S_DELAY_ALU instruction.
606 bool hasDelayAlu() const { return HasGFX11Insts; }
607
608 /// Returns true if the target supports
609 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
610 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
611 bool hasLDSLoadB96_B128() const { return hasGFX950Insts(); }
612
613 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
614 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
615 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
616
617 /// \returns true if inline constants are not supported for F16 pseudo
618 /// scalar transcendentals.
620 return getGeneration() == GFX12;
621 }
622
623 /// \returns true if the target has packed f32 instructions that only read 32
624 /// bits from a scalar operand (SGPR or literal) and replicates the bits to
625 /// both channels.
627 return getGeneration() == GFX12 && HasGFX1250Insts;
628 }
629
630 bool hasAddPC64Inst() const { return HasGFX1250Insts; }
631
632 /// \returns true if the target supports expert scheduling mode 2 which relies
633 /// on the compiler to insert waits to avoid hazards between VMEM and VALU
634 /// instructions in some instances.
635 bool hasExpertSchedulingMode() const { return getGeneration() >= GFX12; }
636
637 /// \returns The maximum number of instructions that can be enclosed in an
638 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
639 /// instruction.
640 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
641
642 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
643 /// SGPRs
644 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
645
646 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
647 /// VGPRs
648 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
649 unsigned DynamicVGPRBlockSize) const;
650
651 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
652 /// be achieved when the only function running on a CU is \p F, each workgroup
653 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
654 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
655 /// range, so this returns a range as well.
656 ///
657 /// Note that occupancy can be affected by the scratch allocation as well, but
658 /// we do not have enough information to compute it.
659 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
660 unsigned LDSSize = 0,
661 unsigned NumSGPRs = 0,
662 unsigned NumVGPRs = 0) const;
663
664 /// \returns true if the flat_scratch register should be initialized with the
665 /// pointer to the wave's scratch memory rather than a size and offset.
666 bool flatScratchIsPointer() const {
668 }
669
670 /// \returns true if the machine has merged shaders in which s0-s7 are
671 /// reserved by the hardware and user SGPRs start at s8
672 bool hasMergedShaders() const { return getGeneration() >= GFX9; }
673
674 // \returns true if the target supports the pre-NGG legacy geometry path.
675 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
676
677 // \returns true if the target has split barriers feature
678 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
679
680 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
681 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
682
683 // \returns true if the target has IEEE kernel descriptor mode bit
684 bool hasIEEEMode() const { return getGeneration() < GFX12; }
685
686 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
687 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
688
689 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
690 /// values.
691 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
692
693 bool hasINVWBL2WaitCntRequirement() const { return HasGFX1250Insts; }
694
695 bool hasVOPD3() const { return HasGFX1250Insts; }
696
697 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
698 bool hasVectorMulU64() const { return HasGFX1250Insts; }
699
700 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
701 // instructions.
702 bool hasMadU64U32NoCarry() const { return HasGFX1250Insts; }
703
704 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
705 bool hasIntMinMax64() const { return HasGFX1250Insts; }
706
707 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
708 bool hasPkMinMax3Insts() const { return HasGFX1250Insts; }
709
710 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
711 bool hasSGetShaderCyclesInst() const { return HasGFX1250Insts; }
712
713 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
714 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
715 // extended VA to 57 bits.
717 return HasGFX12Insts && !HasGFX1250Insts;
718 }
719
720 // \returns true if the target needs to create a prolog for backward
721 // compatibility when preloading kernel arguments.
723 return hasKernargPreload() && !HasGFX1250Insts;
724 }
725
726 bool hasCondSubInsts() const { return HasGFX12Insts; }
727
728 bool hasSubClampInsts() const { return hasGFX10_3Insts(); }
729
730 /// \returns SGPR allocation granularity supported by the subtarget.
731 unsigned getSGPRAllocGranule() const {
733 }
734
735 /// \returns SGPR encoding granularity supported by the subtarget.
736 unsigned getSGPREncodingGranule() const {
738 }
739
740 /// \returns Total number of SGPRs supported by the subtarget.
741 unsigned getTotalNumSGPRs() const {
743 }
744
745 /// \returns Addressable number of SGPRs supported by the subtarget.
746 unsigned getAddressableNumSGPRs() const {
748 }
749
750 /// \returns Minimum number of SGPRs that meets the given number of waves per
751 /// execution unit requirement supported by the subtarget.
752 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
753 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
754 }
755
756 /// \returns Maximum number of SGPRs that meets the given number of waves per
757 /// execution unit requirement supported by the subtarget.
758 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
759 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
760 }
761
762 /// \returns Reserved number of SGPRs. This is common
763 /// utility function called by MachineFunction and
764 /// Function variants of getReservedNumSGPRs.
765 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
766 /// \returns Reserved number of SGPRs for given machine function \p MF.
767 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
768
769 /// \returns Reserved number of SGPRs for given function \p F.
770 unsigned getReservedNumSGPRs(const Function &F) const;
771
772 /// \returns Maximum number of preloaded SGPRs for the subtarget.
773 unsigned getMaxNumPreloadedSGPRs() const;
774
775 /// \returns max num SGPRs. This is the common utility
776 /// function called by MachineFunction and Function
777 /// variants of getMaxNumSGPRs.
778 unsigned getBaseMaxNumSGPRs(const Function &F,
779 std::pair<unsigned, unsigned> WavesPerEU,
780 unsigned PreloadedSGPRs,
781 unsigned ReservedNumSGPRs) const;
782
783 /// \returns Maximum number of SGPRs that meets number of waves per execution
784 /// unit requirement for function \p MF, or number of SGPRs explicitly
785 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
786 ///
787 /// \returns Value that meets number of waves per execution unit requirement
788 /// if explicitly requested value cannot be converted to integer, violates
789 /// subtarget's specifications, or does not meet number of waves per execution
790 /// unit requirement.
791 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
792
793 /// \returns Maximum number of SGPRs that meets number of waves per execution
794 /// unit requirement for function \p F, or number of SGPRs explicitly
795 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
796 ///
797 /// \returns Value that meets number of waves per execution unit requirement
798 /// if explicitly requested value cannot be converted to integer, violates
799 /// subtarget's specifications, or does not meet number of waves per execution
800 /// unit requirement.
801 unsigned getMaxNumSGPRs(const Function &F) const;
802
803 /// \returns VGPR allocation granularity supported by the subtarget.
804 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
805 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
806 }
807
808 /// \returns VGPR encoding granularity supported by the subtarget.
809 unsigned getVGPREncodingGranule() const {
811 }
812
813 /// \returns Total number of VGPRs supported by the subtarget.
814 unsigned getTotalNumVGPRs() const {
816 }
817
818 /// \returns Addressable number of architectural VGPRs supported by the
819 /// subtarget.
823
824 /// \returns Addressable number of VGPRs supported by the subtarget.
825 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
826 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
827 }
828
829 /// \returns the minimum number of VGPRs that will prevent achieving more than
830 /// the specified number of waves \p WavesPerEU.
831 unsigned getMinNumVGPRs(unsigned WavesPerEU,
832 unsigned DynamicVGPRBlockSize) const {
833 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
834 DynamicVGPRBlockSize);
835 }
836
837 /// \returns the maximum number of VGPRs that can be used and still achieved
838 /// at least the specified number of waves \p WavesPerEU.
839 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
840 unsigned DynamicVGPRBlockSize) const {
841 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
842 DynamicVGPRBlockSize);
843 }
844
845 /// \returns max num VGPRs. This is the common utility function
846 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
847 unsigned
849 std::pair<unsigned, unsigned> NumVGPRBounds) const;
850
851 /// \returns Maximum number of VGPRs that meets number of waves per execution
852 /// unit requirement for function \p F, or number of VGPRs explicitly
853 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
854 ///
855 /// \returns Value that meets number of waves per execution unit requirement
856 /// if explicitly requested value cannot be converted to integer, violates
857 /// subtarget's specifications, or does not meet number of waves per execution
858 /// unit requirement.
859 unsigned getMaxNumVGPRs(const Function &F) const;
860
861 unsigned getMaxNumAGPRs(const Function &F) const { return getMaxNumVGPRs(F); }
862
863 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
864 /// of waves per execution unit required for the function \p MF.
865 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
866
867 /// \returns Maximum number of VGPRs that meets number of waves per execution
868 /// unit requirement for function \p MF, or number of VGPRs explicitly
869 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
870 ///
871 /// \returns Value that meets number of waves per execution unit requirement
872 /// if explicitly requested value cannot be converted to integer, violates
873 /// subtarget's specifications, or does not meet number of waves per execution
874 /// unit requirement.
875 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
876
877 bool supportsWave32() const { return getGeneration() >= GFX10; }
878
879 bool supportsWave64() const { return !hasGFX1250Insts(); }
880
881 bool isWave32() const { return getWavefrontSize() == 32; }
882
883 bool isWave64() const { return getWavefrontSize() == 64; }
884
885 /// Returns if the wavesize of this subtarget is known reliable. This is false
886 /// only for the a default target-cpu that does not have an explicit
887 /// +wavefrontsize target feature.
888 bool isWaveSizeKnown() const {
889 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
890 hasFeature(AMDGPU::FeatureWavefrontSize64);
891 }
892
894 return getRegisterInfo()->getBoolRC();
895 }
896
897 /// \returns Maximum number of work groups per compute unit supported by the
898 /// subtarget and limited by given \p FlatWorkGroupSize.
899 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
900 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
901 }
902
903 /// \returns Minimum flat work group size supported by the subtarget.
904 unsigned getMinFlatWorkGroupSize() const override {
906 }
907
908 /// \returns Maximum flat work group size supported by the subtarget.
909 unsigned getMaxFlatWorkGroupSize() const override {
911 }
912
913 /// \returns Number of waves per execution unit required to support the given
914 /// \p FlatWorkGroupSize.
915 unsigned
916 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
917 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
918 }
919
920 /// \returns Minimum number of waves per execution unit supported by the
921 /// subtarget.
922 unsigned getMinWavesPerEU() const override {
924 }
925
926 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
927 SDep &Dep,
928 const TargetSchedModel *SchedModel) const override;
929
930 // \returns true if it's beneficial on this subtarget for the scheduler to
931 // cluster stores as well as loads.
932 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
933
934 // \returns the number of address arguments from which to enable MIMG NSA
935 // on supported architectures.
936 unsigned getNSAThreshold(const MachineFunction &MF) const;
937
938 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
939 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
940 bool requiresNopBeforeDeallocVGPRs() const { return !HasGFX1250Insts; }
941
942 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
943 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
944 bool requiresWaitIdleBeforeGetReg() const { return HasGFX1250Insts; }
945
946 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
947 unsigned getDynamicVGPRBlockSize() const {
948 return DynamicVGPRBlockSize32 ? 32 : 16;
949 }
950
952 // AMDGPU doesn't care if early-clobber and undef operands are allocated
953 // to the same register.
954 return false;
955 }
956
957 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
958 // and surronded by S_WAIT_ALU(0xFFE3).
960 return getGeneration() == GFX12;
961 }
962
963 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
964 // read.
966 return HasGFX1250Insts && getGeneration() == GFX12;
967 }
968
969 // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit
970 // result.
972 return HasGFX1250Insts && getGeneration() == GFX12;
973 }
974
975 /// \returns true if the subtarget requires a wait for xcnt before VMEM
976 /// accesses that must never be repeated in the event of a page fault/re-try.
977 /// Atomic stores/rmw and all volatile accesses fall under this criteria.
979 return HasGFX1250Insts;
980 }
981
982 /// \returns the number of significant bits in the immediate field of the
983 /// S_NOP instruction.
984 unsigned getSNopBits() const {
986 return 7;
988 return 4;
989 return 3;
990 }
991
995
1001
1002 /// Return true if real (non-fake) variants of True16 instructions using
1003 /// 16-bit registers should be code-generated. Fake True16 instructions are
1004 /// identical to non-fake ones except that they take 32-bit registers as
1005 /// operands and always use their low halves.
1006 // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
1007 // supported and the support for fake True16 instructions is removed.
1008 bool useRealTrue16Insts() const {
1009 return hasTrue16BitInsts() && EnableRealTrue16Insts;
1010 }
1011
1013 return getGeneration() >= GFX10 || isTgSplitEnabled();
1014 }
1015};
1016
1018public:
1019 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1020
1021 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1022
1023 bool hasDispatchPtr() const { return DispatchPtr; }
1024
1025 bool hasQueuePtr() const { return QueuePtr; }
1026
1027 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1028
1029 bool hasDispatchID() const { return DispatchID; }
1030
1031 bool hasFlatScratchInit() const { return FlatScratchInit; }
1032
1033 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1034
1035 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1036
1037 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1038
1039 unsigned getNumFreeUserSGPRs();
1040
1041 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1042
1053
1054 // Returns the size in number of SGPRs for preload user SGPR field.
1056 switch (ID) {
1058 return 2;
1060 return 4;
1061 case DispatchPtrID:
1062 return 2;
1063 case QueuePtrID:
1064 return 2;
1066 return 2;
1067 case DispatchIdID:
1068 return 2;
1069 case FlatScratchInitID:
1070 return 2;
1072 return 1;
1073 }
1074 llvm_unreachable("Unknown UserSGPRID.");
1075 }
1076
1077 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1078
1079private:
1080 const GCNSubtarget &ST;
1081
1082 // Private memory buffer
1083 // Compute directly in sgpr[0:1]
1084 // Other shaders indirect 64-bits at sgpr[0:1]
1085 bool ImplicitBufferPtr = false;
1086
1087 bool PrivateSegmentBuffer = false;
1088
1089 bool DispatchPtr = false;
1090
1091 bool QueuePtr = false;
1092
1093 bool KernargSegmentPtr = false;
1094
1095 bool DispatchID = false;
1096
1097 bool FlatScratchInit = false;
1098
1099 bool PrivateSegmentSize = false;
1100
1101 unsigned NumKernargPreloadSGPRs = 0;
1102
1103 unsigned NumUsedUserSGPRs = 0;
1104};
1105
1106} // end namespace llvm
1107
1108#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasFlat() const
bool hasD16Images() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkMinMax3Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasRrWGMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasOnlyRevVALUShifts() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
bool hasExpertSchedulingMode() const
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool flatScratchIsPointer() const
bool requiresWaitOnWorkgroupReleaseFence() const
bool hasShift64HighRegBug() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool vmemWriteNeedsExpWaitcnt() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasUserSGPRInit16BugInWave32() const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasFlatScratchHiInB64InstHazard() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasFlatScratchEnabled() const
unsigned getSNopBits() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool hasMultiDwordFlatScratchAddressing() const
bool hasFmaakFmamkF64Insts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasUnalignedDSAccessEnabled() const
const SIInstrInfo * getInstrInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasVALUMaskWriteHazard() const
bool hasCondSubInsts() const
const InlineAsmLowering * getInlineAsmLowering() const override
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasDwordx3LoadStores() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasFlatScrRegister() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool isTgSplitEnabled() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasMad64_32() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool d16PreservesUnusedBits() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool isMesaGfxShader(const Function &F) const
bool hasExportInsts() const
bool hasVINTERPEncoding() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasLegacyGeometry() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
const SIFrameLowering * getFrameLowering() const override
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool needsKernArgPreloadProlog() const
bool hasMin3Max3_16() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasTransForwardingHazard() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasIntMinMax64() const
bool hasScalarPackInsts() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool supportsWaveWideBPermute() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool useAA() const override
bool isWave32() const
bool isGFX11Plus() const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool supportsBPermute() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasAtomicFaddInsts() const
bool hasSubClampInsts() const
bool requiresWaitXCntForSingleAccessInstructions() const
unsigned getNSAMaxSize(bool HasSampler=false) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxNumUserSGPRs() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasDS96AndDS128() const
bool hasReadM0LdsDirectHazard() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasINVWBL2WaitCntRequirement() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasAddr64() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool setRegModeNeedsVNOPs() const
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
unsigned getMaxWaveScratchSize() const
bool hasLDSMisalignedBugInWGPMode() const
bool hasMTBUFInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.