LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Possibly statically set by tablegen, but may want to be overridden.
70 bool FastDenormalF32 = false;
71 bool HalfRate64Ops = false;
72 bool FullRate64Ops = false;
73
74 // Dynamically set bits that enable features.
75 bool FlatForGlobal = false;
77 bool BackOffBarrier = false;
79 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasCubeInsts = false;
170 bool HasLerpInst = false;
171 bool HasSadInsts = false;
172 bool HasQsadInsts = false;
173 bool HasCvtNormInsts = false;
176 bool HasFP8E5M3Insts = false;
177 bool HasCvtFP8Vop1Bug = false;
178 bool HasPkFmacF16Inst = false;
199 bool HasXF32Insts = false;
200 /// The maximum number of instructions that may be placed within an S_CLAUSE,
201 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
202 /// indicates a lack of S_CLAUSE support.
204 bool SupportsSRAMECC = false;
205 bool DynamicVGPR = false;
207 bool HasVMemToLDSLoad = false;
208 bool RequiresAlignVGPR = false;
209
210 // This should not be used directly. 'TargetID' tracks the dynamic settings
211 // for SRAMECC.
212 bool EnableSRAMECC = false;
213
214 bool HasNoSdstCMPX = false;
215 bool HasVscnt = false;
216 bool HasWaitXcnt = false;
217 bool HasGetWaveIdInst = false;
218 bool HasSMemTimeInst = false;
221 bool HasVOP3Literal = false;
222 bool HasNoDataDepHazard = false;
223 bool FlatAddressSpace = false;
224 bool FlatInstOffsets = false;
225 bool FlatGlobalInsts = false;
226 bool FlatScratchInsts = false;
227 bool FlatGVSMode = false;
230 bool EnableFlatScratch = false;
232 bool HasGDS = false;
233 bool HasGWS = false;
234 bool AddNoCarryInsts = false;
235 bool HasUnpackedD16VMem = false;
236 bool LDSMisalignedBug = false;
239 bool UnalignedDSAccess = false;
240 bool HasPackedTID = false;
241 bool ScalarizeGlobal = false;
242 bool HasSALUFloatInsts = false;
245 bool Has64BitLiterals = false;
247 bool HasBitOp3Insts = false;
248 bool HasTanhInsts = false;
251 bool HasPrngInst = false;
253 bool HasPermlane16Swap = false;
254 bool HasPermlane32Swap = false;
259 bool HasVmemPrefInsts = false;
261 bool HasSafeCUPrefetch = false;
264 bool HasNSAtoVMEMBug = false;
265 bool HasNSAClauseBug = false;
266 bool HasOffset3fBug = false;
272 bool Has1_5xVGPRs = false;
273 bool HasMADIntraFwdBug = false;
274 bool HasVOPDInsts = false;
278 bool HasAshrPkInsts = false;
282 bool HasMin3Max3PKF16 = false;
284 bool HasLshlAddU64Inst = false;
285 bool HasAddSubU64Insts = false;
286 bool HasMadU32Inst = false;
287 bool HasAddMinMaxInsts = false;
292 bool HasSWakeupBarrier = false;
293
294 bool RequiresCOV6 = false;
297
299
300 bool HasClusters = false;
302
303 // Dummy feature to use for assembler in tablegen.
304 bool FeatureDisable = false;
305
306private:
307 SIInstrInfo InstrInfo;
308 SITargetLowering TLInfo;
309 SIFrameLowering FrameLowering;
310
311public:
312 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
313 const GCNTargetMachine &TM);
314 ~GCNSubtarget() override;
315
317 StringRef GPU, StringRef FS);
318
319 /// Diagnose inconsistent subtarget features before attempting to codegen
320 /// function \p F.
321 void checkSubtargetFeatures(const Function &F) const;
322
323 const SIInstrInfo *getInstrInfo() const override {
324 return &InstrInfo;
325 }
326
327 const SIFrameLowering *getFrameLowering() const override {
328 return &FrameLowering;
329 }
330
331 const SITargetLowering *getTargetLowering() const override {
332 return &TLInfo;
333 }
334
335 const SIRegisterInfo *getRegisterInfo() const override {
336 return &InstrInfo.getRegisterInfo();
337 }
338
339 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
340
341 const CallLowering *getCallLowering() const override {
342 return CallLoweringInfo.get();
343 }
344
345 const InlineAsmLowering *getInlineAsmLowering() const override {
346 return InlineAsmLoweringInfo.get();
347 }
348
350 return InstSelector.get();
351 }
352
353 const LegalizerInfo *getLegalizerInfo() const override {
354 return Legalizer.get();
355 }
356
357 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
358 return RegBankInfo.get();
359 }
360
362 return TargetID;
363 }
364
366 return &InstrItins;
367 }
368
370
372 return (Generation)Gen;
373 }
374
375 unsigned getMaxWaveScratchSize() const {
376 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
377 if (getGeneration() >= GFX12) {
378 // 18-bit field in units of 64-dword.
379 return (64 * 4) * ((1 << 18) - 1);
380 }
381 if (getGeneration() == GFX11) {
382 // 15-bit field in units of 64-dword.
383 return (64 * 4) * ((1 << 15) - 1);
384 }
385 // 13-bit field in units of 256-dword.
386 return (256 * 4) * ((1 << 13) - 1);
387 }
388
389 /// Return the number of high bits known to be zero for a frame index.
393
394 int getLDSBankCount() const {
395 return LDSBankCount;
396 }
397
398 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
399 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
400 }
401
402 unsigned getConstantBusLimit(unsigned Opcode) const;
403
404 /// Returns if the result of this instruction with a 16-bit result returned in
405 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
406 /// the original value.
407 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
408
409 bool supportsWGP() const {
410 if (GFX1250Insts)
411 return false;
412 return getGeneration() >= GFX10;
413 }
414
415 bool hasIntClamp() const {
416 return HasIntClamp;
417 }
418
419 bool hasFP64() const {
420 return FP64;
421 }
422
423 bool hasMIMG_R128() const {
424 return MIMG_R128;
425 }
426
427 bool hasHWFP64() const {
428 return FP64;
429 }
430
431 bool hasHalfRate64Ops() const {
432 return HalfRate64Ops;
433 }
434
435 bool hasFullRate64Ops() const {
436 return FullRate64Ops;
437 }
438
439 bool hasAddr64() const {
441 }
442
443 bool hasFlat() const {
445 }
446
447 // Return true if the target only has the reverse operand versions of VALU
448 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
449 bool hasOnlyRevVALUShifts() const {
451 }
452
453 bool hasFractBug() const {
455 }
456
457 bool hasMed3_16() const {
459 }
460
461 bool hasMin3Max3_16() const {
463 }
464
465 bool hasFmaMixInsts() const {
466 return HasFmaMixInsts;
467 }
468
469 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
470
471 bool hasFMA() const {
472 return FMA;
473 }
474
475 bool hasSwap() const {
476 return GFX9Insts;
477 }
478
479 bool hasScalarPackInsts() const {
480 return GFX9Insts;
481 }
482
483 bool hasScalarMulHiInsts() const {
484 return GFX9Insts;
485 }
486
487 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
488
492
494 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
495 return getGeneration() >= GFX9;
496 }
497
498 /// True if the offset field of DS instructions works as expected. On SI, the
499 /// offset uses a 16-bit adder and does not always wrap properly.
500 bool hasUsableDSOffset() const {
501 return getGeneration() >= SEA_ISLANDS;
502 }
503
507
508 /// Condition output from div_scale is usable.
512
513 /// Extra wait hazard is needed in some cases before
514 /// s_cbranch_vccnz/s_cbranch_vccz.
515 bool hasReadVCCZBug() const {
516 return getGeneration() <= SEA_ISLANDS;
517 }
518
519 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
521 return getGeneration() >= GFX10;
522 }
523
524 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
525 /// was written by a VALU instruction.
528 }
529
530 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
531 /// SGPR was written by a VALU Instruction.
534 }
535
536 bool hasRFEHazards() const {
538 }
539
540 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
541 unsigned getSetRegWaitStates() const {
542 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
543 }
544
545 bool dumpCode() const {
546 return DumpCode;
547 }
548
549 /// Return the amount of LDS that can be used that will not restrict the
550 /// occupancy lower than WaveCount.
551 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
552 const Function &) const;
553
556 }
557
558 /// \returns If target supports S_DENORM_MODE.
559 bool hasDenormModeInst() const {
561 }
562
563 bool useFlatForGlobal() const {
564 return FlatForGlobal;
565 }
566
567 /// \returns If target supports ds_read/write_b128 and user enables generation
568 /// of ds_read/write_b128.
569 bool useDS128() const {
570 return CIInsts && EnableDS128;
571 }
572
573 /// \return If target supports ds_read/write_b96/128.
574 bool hasDS96AndDS128() const {
575 return CIInsts;
576 }
577
578 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
579 bool haveRoundOpsF64() const {
580 return CIInsts;
581 }
582
583 /// \returns If MUBUF instructions always perform range checking, even for
584 /// buffer resources used for private memory access.
588
589 /// \returns If target requires PRT Struct NULL support (zero result registers
590 /// for sparse texture support).
591 bool usePRTStrictNull() const {
592 return EnablePRTStrictNull;
593 }
594
598
599 /// \returns true if the target supports backing off of s_barrier instructions
600 /// when an exception is raised.
602 return BackOffBarrier;
603 }
604
607 }
608
612
613 bool hasUnalignedDSAccess() const {
614 return UnalignedDSAccess;
615 }
616
620
623 }
624
628
630 return UnalignedAccessMode;
631 }
632
634
635 bool hasApertureRegs() const {
636 return HasApertureRegs;
637 }
638
639 bool isTrapHandlerEnabled() const {
640 return TrapHandler;
641 }
642
643 bool isXNACKEnabled() const {
644 return TargetID.isXnackOnOrAny();
645 }
646
647 bool isTgSplitEnabled() const {
648 return EnableTgSplit;
649 }
650
651 bool isCuModeEnabled() const {
652 return EnableCuMode;
653 }
654
656
657 bool hasFlatAddressSpace() const {
658 return FlatAddressSpace;
659 }
660
661 bool hasFlatScrRegister() const {
662 return hasFlatAddressSpace();
663 }
664
665 bool hasFlatInstOffsets() const {
666 return FlatInstOffsets;
667 }
668
669 bool hasFlatGlobalInsts() const {
670 return FlatGlobalInsts;
671 }
672
673 bool hasFlatScratchInsts() const {
674 return FlatScratchInsts;
675 }
676
677 // Check if target supports ST addressing mode with FLAT scratch instructions.
678 // The ST addressing mode means no registers are used, either VGPR or SGPR,
679 // but only immediate offset is swizzled and added to the FLAT scratch base.
680 bool hasFlatScratchSTMode() const {
682 }
683
684 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
685
688 }
689
690 bool enableFlatScratch() const {
691 return flatScratchIsArchitected() ||
693 }
694
695 bool hasGlobalAddTidInsts() const {
696 return GFX10_BEncoding;
697 }
698
699 bool hasAtomicCSub() const {
700 return GFX10_BEncoding;
701 }
702
703 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
704
705 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
706
707 bool hasExportInsts() const {
708 return !hasGFX940Insts() && !hasGFX1250Insts();
709 }
710
711 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
712
713 // DS_ADD_F64/DS_ADD_RTN_F64
714 bool hasLdsAtomicAddF64() const {
715 return hasGFX90AInsts() || hasGFX1250Insts();
716 }
717
719 return getGeneration() >= GFX9;
720 }
721
724 }
725
727 return getGeneration() > GFX9;
728 }
729
730 bool hasD16LoadStore() const {
731 return getGeneration() >= GFX9;
732 }
733
735 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
736 }
737
738 bool hasD16Images() const {
740 }
741
742 /// Return if most LDS instructions have an m0 use that require m0 to be
743 /// initialized.
744 bool ldsRequiresM0Init() const {
745 return getGeneration() < GFX9;
746 }
747
748 // True if the hardware rewinds and replays GWS operations if a wave is
749 // preempted.
750 //
751 // If this is false, a GWS operation requires testing if a nack set the
752 // MEM_VIOL bit, and repeating if so.
753 bool hasGWSAutoReplay() const {
754 return getGeneration() >= GFX9;
755 }
756
757 /// \returns if target has ds_gws_sema_release_all instruction.
758 bool hasGWSSemaReleaseAll() const {
759 return CIInsts;
760 }
761
762 /// \returns true if the target has integer add/sub instructions that do not
763 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
764 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
765 /// for saturation.
766 bool hasAddNoCarry() const {
767 return AddNoCarryInsts;
768 }
769
770 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
771
772 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
773
774 bool hasUnpackedD16VMem() const {
775 return HasUnpackedD16VMem;
776 }
777
778 // Covers VS/PS/CS graphics shaders
779 bool isMesaGfxShader(const Function &F) const {
780 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
781 }
782
783 bool hasMad64_32() const {
784 return getGeneration() >= SEA_ISLANDS;
785 }
786
787 bool hasSDWAOmod() const {
788 return HasSDWAOmod;
789 }
790
791 bool hasSDWAScalar() const {
792 return HasSDWAScalar;
793 }
794
795 bool hasSDWASdst() const {
796 return HasSDWASdst;
797 }
798
799 bool hasSDWAMac() const {
800 return HasSDWAMac;
801 }
802
803 bool hasSDWAOutModsVOPC() const {
804 return HasSDWAOutModsVOPC;
805 }
806
807 bool hasDLInsts() const {
808 return HasDLInsts;
809 }
810
811 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
812
813 bool hasDot1Insts() const {
814 return HasDot1Insts;
815 }
816
817 bool hasDot2Insts() const {
818 return HasDot2Insts;
819 }
820
821 bool hasDot3Insts() const {
822 return HasDot3Insts;
823 }
824
825 bool hasDot4Insts() const {
826 return HasDot4Insts;
827 }
828
829 bool hasDot5Insts() const {
830 return HasDot5Insts;
831 }
832
833 bool hasDot6Insts() const {
834 return HasDot6Insts;
835 }
836
837 bool hasDot7Insts() const {
838 return HasDot7Insts;
839 }
840
841 bool hasDot8Insts() const {
842 return HasDot8Insts;
843 }
844
845 bool hasDot9Insts() const {
846 return HasDot9Insts;
847 }
848
849 bool hasDot10Insts() const {
850 return HasDot10Insts;
851 }
852
853 bool hasDot11Insts() const {
854 return HasDot11Insts;
855 }
856
857 bool hasDot12Insts() const {
858 return HasDot12Insts;
859 }
860
861 bool hasDot13Insts() const {
862 return HasDot13Insts;
863 }
864
865 bool hasMAIInsts() const {
866 return HasMAIInsts;
867 }
868
869 bool hasFP8Insts() const {
870 return HasFP8Insts;
871 }
872
874
875 bool hasCubeInsts() const { return HasCubeInsts; }
876
877 bool hasLerpInst() const { return HasLerpInst; }
878
879 bool hasSadInsts() const { return HasSadInsts; }
880
881 bool hasQsadInsts() const { return HasQsadInsts; }
882
883 bool hasCvtNormInsts() const { return HasCvtNormInsts; }
884
886
888
889 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
890
891 bool hasPkFmacF16Inst() const {
892 return HasPkFmacF16Inst;
893 }
894
898
902
906
910
912
914
918
920
922
926
930
934
938
940
941 /// \return true if the target has flat, global, and buffer atomic fadd for
942 /// double.
946
947 /// \return true if the target's flat, global, and buffer atomic fadd for
948 /// float supports denormal handling.
952
953 /// \return true if atomic operations targeting fine-grained memory work
954 /// correctly at device scope, in allocations in host or peer PCIe device
955 /// memory.
959
960 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
961 /// via CAS loop.
965
967
971
972 bool hasNoSdstCMPX() const {
973 return HasNoSdstCMPX;
974 }
975
976 bool hasVscnt() const {
977 return HasVscnt;
978 }
979
980 bool hasGetWaveIdInst() const {
981 return HasGetWaveIdInst;
982 }
983
984 bool hasSMemTimeInst() const {
985 return HasSMemTimeInst;
986 }
987
990 }
991
995
996 bool hasVOP3Literal() const {
997 return HasVOP3Literal;
998 }
999
1000 bool hasNoDataDepHazard() const {
1001 return HasNoDataDepHazard;
1002 }
1003
1005 return getGeneration() < SEA_ISLANDS;
1006 }
1007
1008 bool hasInstPrefetch() const {
1009 return getGeneration() == GFX10 || getGeneration() == GFX11;
1010 }
1011
1012 bool hasPrefetch() const { return GFX12Insts; }
1013
1014 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1015
1017
1018 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1019
1020 // Has s_cmpk_* instructions.
1021 bool hasSCmpK() const { return getGeneration() < GFX12; }
1022
1023 // Scratch is allocated in 256 dword per wave blocks for the entire
1024 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1025 // is 4-byte aligned.
1026 //
1027 // Only 4-byte alignment is really needed to access anything. Transformations
1028 // on the pointer value itself may rely on the alignment / known low bits of
1029 // the pointer. Set this to something above the minimum to avoid needing
1030 // dynamic realignment in common cases.
1031 Align getStackAlignment() const { return Align(16); }
1032
1033 bool enableMachineScheduler() const override {
1034 return true;
1035 }
1036
1037 bool useAA() const override;
1038
1039 bool enableSubRegLiveness() const override {
1040 return true;
1041 }
1042
1045
1046 // static wrappers
1047 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1048
1049 // XXX - Why is this here if it isn't in the default pass set?
1050 bool enableEarlyIfConversion() const override {
1051 return true;
1052 }
1053
1055 const SchedRegion &Region) const override;
1056
1058 const SchedRegion &Region) const override;
1059
1060 void mirFileLoaded(MachineFunction &MF) const override;
1061
1062 unsigned getMaxNumUserSGPRs() const {
1063 return AMDGPU::getMaxNumUserSGPRs(*this);
1064 }
1065
1066 bool hasSMemRealTime() const {
1067 return HasSMemRealTime;
1068 }
1069
1070 bool hasMovrel() const {
1071 return HasMovrel;
1072 }
1073
1074 bool hasVGPRIndexMode() const {
1075 return HasVGPRIndexMode;
1076 }
1077
1078 bool useVGPRIndexMode() const;
1079
1081 return getGeneration() >= VOLCANIC_ISLANDS;
1082 }
1083
1085
1086 bool hasScalarStores() const {
1087 return HasScalarStores;
1088 }
1089
1090 bool hasScalarAtomics() const {
1091 return HasScalarAtomics;
1092 }
1093
1094 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1096
1097 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1098 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1099
1100 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1101 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1102
1103 bool hasDPP() const {
1104 return HasDPP;
1105 }
1106
1107 bool hasDPPBroadcasts() const {
1108 return HasDPP && getGeneration() < GFX10;
1109 }
1110
1112 return HasDPP && getGeneration() < GFX10;
1113 }
1114
1115 bool hasDPP8() const {
1116 return HasDPP8;
1117 }
1118
1119 bool hasDPALU_DPP() const {
1120 return HasDPALU_DPP;
1121 }
1122
1123 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1124
1125 bool hasPackedFP32Ops() const {
1126 return HasPackedFP32Ops;
1127 }
1128
1129 // Has V_PK_MOV_B32 opcode
1130 bool hasPkMovB32() const {
1131 return GFX90AInsts;
1132 }
1133
1135 return getGeneration() >= GFX10 || hasGFX940Insts();
1136 }
1137
1138 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1139
1140 bool hasImageInsts() const {
1141 return HasImageInsts;
1142 }
1143
1145 return HasExtendedImageInsts;
1146 }
1147
1148 bool hasR128A16() const {
1149 return HasR128A16;
1150 }
1151
1152 bool hasA16() const { return HasA16; }
1153
1154 bool hasG16() const { return HasG16; }
1155
1156 bool hasOffset3fBug() const {
1157 return HasOffset3fBug;
1158 }
1159
1161
1163
1164 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1165
1167
1169
1170 bool hasNSAEncoding() const { return HasNSAEncoding; }
1171
1172 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1173
1175
1176 unsigned getNSAMaxSize(bool HasSampler = false) const {
1177 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1178 }
1179
1180 bool hasGFX10_AEncoding() const {
1181 return GFX10_AEncoding;
1182 }
1183
1184 bool hasGFX10_BEncoding() const {
1185 return GFX10_BEncoding;
1186 }
1187
1188 bool hasGFX10_3Insts() const {
1189 return GFX10_3Insts;
1190 }
1191
1192 bool hasMadF16() const;
1193
1194 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1195
1196 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1197
1198 // Scalar and global loads support scale_offset bit.
1199 bool hasScaleOffset() const { return GFX1250Insts; }
1200
1201 bool hasFlatGVSMode() const { return FlatGVSMode; }
1202
1203 // FLAT GLOBAL VOffset is signed
1204 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1205
1206 bool enableSIScheduler() const {
1207 return EnableSIScheduler;
1208 }
1209
1210 bool loadStoreOptEnabled() const {
1211 return EnableLoadStoreOpt;
1212 }
1213
1214 bool hasSGPRInitBug() const {
1215 return SGPRInitBug;
1216 }
1217
1219 return UserSGPRInit16Bug && isWave32();
1220 }
1221
1223
1227
1230 }
1231
1235
1236 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1238 return CIInsts;
1239 }
1240
1243 }
1244
1249
1252 }
1253
1256 }
1257
1260 }
1261
1264 }
1265
1268 }
1269
1270 bool hasLDSMisalignedBug() const {
1271 return LDSMisalignedBug && !EnableCuMode;
1272 }
1273
1275 return HasInstFwdPrefetchBug;
1276 }
1277
1279 return HasVcmpxExecWARHazard;
1280 }
1281
1284 }
1285
1286 // Shift amount of a 64 bit shift cannot be a highest allocated register
1287 // if also at the end of the allocation block.
1289 return GFX90AInsts && !GFX940Insts;
1290 }
1291
1292 // Has one cycle hazard on transcendental instruction feeding a
1293 // non transcendental VALU.
1294 bool hasTransForwardingHazard() const { return GFX940Insts; }
1295
1296 // Has one cycle hazard on a VALU instruction partially writing dst with
1297 // a shift of result bits feeding another VALU instruction.
1299
1300 // Cannot use op_sel with v_dot instructions.
1301 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1302
1303 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1304 bool hasVDecCoExecHazard() const {
1305 return GFX940Insts;
1306 }
1307
1308 bool hasNSAtoVMEMBug() const {
1309 return HasNSAtoVMEMBug;
1310 }
1311
1312 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1313
1314 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1315
1316 bool hasGFX90AInsts() const { return GFX90AInsts; }
1317
1319 return getGeneration() == GFX10;
1320 }
1321
1322 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1323
1324 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1325
1326 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1327
1329 return getGeneration() == GFX11;
1330 }
1331
1333
1335
1336 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1337
1339
1343
1344 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1345
1346 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1347
1349 return GFX1250Insts && getGeneration() == GFX12;
1350 }
1351
1352 /// Return if operations acting on VGPR tuples require even alignment.
1353 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1354
1355 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1356 bool hasSPackHL() const { return GFX11Insts; }
1357
1358 /// Return true if the target's EXP instruction has the COMPR flag, which
1359 /// affects the meaning of the EN (enable) bits.
1360 bool hasCompressedExport() const { return !GFX11Insts; }
1361
1362 /// Return true if the target's EXP instruction supports the NULL export
1363 /// target.
1364 bool hasNullExportTarget() const { return !GFX11Insts; }
1365
1366 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1367
1368 bool hasVOPDInsts() const { return HasVOPDInsts; }
1369
1371
1372 /// Return true if the target has the S_DELAY_ALU instruction.
1373 bool hasDelayAlu() const { return GFX11Insts; }
1374
1375 bool hasPackedTID() const { return HasPackedTID; }
1376
1377 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1378 // hasGFX90AInsts is also true.
1379 bool hasGFX940Insts() const { return GFX940Insts; }
1380
1381 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1382 // hasGFX940Insts and hasGFX90AInsts are also true.
1383 bool hasGFX950Insts() const { return GFX950Insts; }
1384
1385 /// Returns true if the target supports
1386 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1387 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1388 bool hasLDSLoadB96_B128() const {
1389 return hasGFX950Insts();
1390 }
1391
1392 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1393
1394 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1395
1397
1399
1401
1403
1404 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1405 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1406 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1407
1408 /// \returns true if inline constants are not supported for F16 pseudo
1409 /// scalar transcendentals.
1411 return getGeneration() == GFX12;
1412 }
1413
1414 /// \returns true if the target has instructions with xf32 format support.
1415 bool hasXF32Insts() const { return HasXF32Insts; }
1416
1417 /// \returns true if the target has packed f32 instructions that only read 32
1418 /// bits from a scalar operand (SGPR or literal) and replicates the bits to
1419 /// both channels.
1423
1424 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1425
1426 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1427 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1428 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1429
1432 }
1433
1436 }
1437
1438 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1439
1440 bool hasTanhInsts() const { return HasTanhInsts; }
1441
1443
1444 bool hasAddPC64Inst() const { return GFX1250Insts; }
1445
1447
1450 }
1451
1453
1454 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1455 /// GFX1250.
1456 bool hasWaitXCnt() const { return HasWaitXcnt; }
1457
1458 // A single DWORD instructions can use a 64-bit literal.
1459 bool has64BitLiterals() const { return Has64BitLiterals; }
1460
1462
1464
1465 /// \returns The maximum number of instructions that can be enclosed in an
1466 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1467 /// instruction.
1468 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1469
1470 bool hasPrngInst() const { return HasPrngInst; }
1471
1473
1474 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1475 /// SGPRs
1476 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1477
1478 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1479 /// VGPRs
1480 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1481 unsigned DynamicVGPRBlockSize) const;
1482
1483 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1484 /// be achieved when the only function running on a CU is \p F, each workgroup
1485 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1486 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1487 /// range, so this returns a range as well.
1488 ///
1489 /// Note that occupancy can be affected by the scratch allocation as well, but
1490 /// we do not have enough information to compute it.
1491 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1492 unsigned LDSSize = 0,
1493 unsigned NumSGPRs = 0,
1494 unsigned NumVGPRs = 0) const;
1495
1496 /// \returns true if the flat_scratch register should be initialized with the
1497 /// pointer to the wave's scratch memory rather than a size and offset.
1500 }
1501
1502 /// \returns true if the flat_scratch register is initialized by the HW.
1503 /// In this case it is readonly.
1505
1506 /// \returns true if the architected SGPRs are enabled.
1508
1509 /// \returns true if Global Data Share is supported.
1510 bool hasGDS() const { return HasGDS; }
1511
1512 /// \returns true if Global Wave Sync is supported.
1513 bool hasGWS() const { return HasGWS; }
1514
1515 /// \returns true if the machine has merged shaders in which s0-s7 are
1516 /// reserved by the hardware and user SGPRs start at s8
1517 bool hasMergedShaders() const {
1518 return getGeneration() >= GFX9;
1519 }
1520
1521 // \returns true if the target supports the pre-NGG legacy geometry path.
1522 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1523
1524 // \returns true if preloading kernel arguments is supported.
1525 bool hasKernargPreload() const { return KernargPreload; }
1526
1527 // \returns true if the target has split barriers feature
1528 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1529
1530 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1531 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1532
1533 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1534 // no-return form.
1536
1537 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1538 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1539
1540 // \returns true if the target has IEEE kernel descriptor mode bit
1541 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1542
1543 // \returns true if the target has IEEE fminimum/fmaximum instructions
1545
1546 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1547 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1548
1549 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1550 /// values.
1551 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1552
1553 bool hasGFX1250Insts() const { return GFX1250Insts; }
1554
1555 bool hasVOPD3() const { return GFX1250Insts; }
1556
1557 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1558 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1559
1560 // \returns true if the target has V_MAD_U32 instruction.
1561 bool hasMadU32Inst() const { return HasMadU32Inst; }
1562
1563 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1564 bool hasVectorMulU64() const { return GFX1250Insts; }
1565
1566 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1567 // instructions.
1568 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1569
1570 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1571 bool hasIntMinMax64() const { return GFX1250Insts; }
1572
1573 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1574 bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
1575
1576 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1578
1579 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1580 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1581
1582 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1583 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1584
1585 // \returns true if target has S_SETPRIO_INC_WG instruction.
1587
1588 // \returns true if target has S_WAKEUP_BARRIER instruction.
1589 bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }
1590
1591 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1592 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1593 // extended VA to 57 bits.
1594 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1595
1596 // \returns true if the target needs to create a prolog for backward
1597 // compatibility when preloading kernel arguments.
1599 return hasKernargPreload() && !GFX1250Insts;
1600 }
1601
1602 bool hasCondSubInsts() const { return GFX12Insts; }
1603
1604 bool hasSubClampInsts() const { return hasGFX10_3Insts(); }
1605
1606 /// \returns SGPR allocation granularity supported by the subtarget.
1607 unsigned getSGPRAllocGranule() const {
1609 }
1610
1611 /// \returns SGPR encoding granularity supported by the subtarget.
1612 unsigned getSGPREncodingGranule() const {
1614 }
1615
1616 /// \returns Total number of SGPRs supported by the subtarget.
1617 unsigned getTotalNumSGPRs() const {
1619 }
1620
1621 /// \returns Addressable number of SGPRs supported by the subtarget.
1622 unsigned getAddressableNumSGPRs() const {
1624 }
1625
1626 /// \returns Minimum number of SGPRs that meets the given number of waves per
1627 /// execution unit requirement supported by the subtarget.
1628 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1629 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1630 }
1631
1632 /// \returns Maximum number of SGPRs that meets the given number of waves per
1633 /// execution unit requirement supported by the subtarget.
1634 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1635 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1636 }
1637
1638 /// \returns Reserved number of SGPRs. This is common
1639 /// utility function called by MachineFunction and
1640 /// Function variants of getReservedNumSGPRs.
1641 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1642 /// \returns Reserved number of SGPRs for given machine function \p MF.
1643 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1644
1645 /// \returns Reserved number of SGPRs for given function \p F.
1646 unsigned getReservedNumSGPRs(const Function &F) const;
1647
1648 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1649 unsigned getMaxNumPreloadedSGPRs() const;
1650
1651 /// \returns max num SGPRs. This is the common utility
1652 /// function called by MachineFunction and Function
1653 /// variants of getMaxNumSGPRs.
1654 unsigned getBaseMaxNumSGPRs(const Function &F,
1655 std::pair<unsigned, unsigned> WavesPerEU,
1656 unsigned PreloadedSGPRs,
1657 unsigned ReservedNumSGPRs) const;
1658
1659 /// \returns Maximum number of SGPRs that meets number of waves per execution
1660 /// unit requirement for function \p MF, or number of SGPRs explicitly
1661 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1662 ///
1663 /// \returns Value that meets number of waves per execution unit requirement
1664 /// if explicitly requested value cannot be converted to integer, violates
1665 /// subtarget's specifications, or does not meet number of waves per execution
1666 /// unit requirement.
1667 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1668
1669 /// \returns Maximum number of SGPRs that meets number of waves per execution
1670 /// unit requirement for function \p F, or number of SGPRs explicitly
1671 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1672 ///
1673 /// \returns Value that meets number of waves per execution unit requirement
1674 /// if explicitly requested value cannot be converted to integer, violates
1675 /// subtarget's specifications, or does not meet number of waves per execution
1676 /// unit requirement.
1677 unsigned getMaxNumSGPRs(const Function &F) const;
1678
1679 /// \returns VGPR allocation granularity supported by the subtarget.
1680 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1681 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1682 }
1683
1684 /// \returns VGPR encoding granularity supported by the subtarget.
1685 unsigned getVGPREncodingGranule() const {
1687 }
1688
1689 /// \returns Total number of VGPRs supported by the subtarget.
1690 unsigned getTotalNumVGPRs() const {
1692 }
1693
1694 /// \returns Addressable number of architectural VGPRs supported by the
1695 /// subtarget.
1699
1700 /// \returns Addressable number of VGPRs supported by the subtarget.
1701 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1702 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1703 }
1704
1705 /// \returns the minimum number of VGPRs that will prevent achieving more than
1706 /// the specified number of waves \p WavesPerEU.
1707 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1708 unsigned DynamicVGPRBlockSize) const {
1709 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1710 DynamicVGPRBlockSize);
1711 }
1712
1713 /// \returns the maximum number of VGPRs that can be used and still achieved
1714 /// at least the specified number of waves \p WavesPerEU.
1715 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1716 unsigned DynamicVGPRBlockSize) const {
1717 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1718 DynamicVGPRBlockSize);
1719 }
1720
1721 /// \returns max num VGPRs. This is the common utility function
1722 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1723 unsigned
1725 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1726
1727 /// \returns Maximum number of VGPRs that meets number of waves per execution
1728 /// unit requirement for function \p F, or number of VGPRs explicitly
1729 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1730 ///
1731 /// \returns Value that meets number of waves per execution unit requirement
1732 /// if explicitly requested value cannot be converted to integer, violates
1733 /// subtarget's specifications, or does not meet number of waves per execution
1734 /// unit requirement.
1735 unsigned getMaxNumVGPRs(const Function &F) const;
1736
1737 unsigned getMaxNumAGPRs(const Function &F) const {
1738 return getMaxNumVGPRs(F);
1739 }
1740
1741 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1742 /// of waves per execution unit required for the function \p MF.
1743 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1744
1745 /// \returns Maximum number of VGPRs that meets number of waves per execution
1746 /// unit requirement for function \p MF, or number of VGPRs explicitly
1747 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1748 ///
1749 /// \returns Value that meets number of waves per execution unit requirement
1750 /// if explicitly requested value cannot be converted to integer, violates
1751 /// subtarget's specifications, or does not meet number of waves per execution
1752 /// unit requirement.
1753 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1754
1755 bool supportsWave32() const { return getGeneration() >= GFX10; }
1756
1757 bool supportsWave64() const { return !hasGFX1250Insts(); }
1758
1759 bool isWave32() const {
1760 return getWavefrontSize() == 32;
1761 }
1762
1763 bool isWave64() const {
1764 return getWavefrontSize() == 64;
1765 }
1766
1767 /// Returns if the wavesize of this subtarget is known reliable. This is false
1768 /// only for the a default target-cpu that does not have an explicit
1769 /// +wavefrontsize target feature.
1770 bool isWaveSizeKnown() const {
1771 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1772 hasFeature(AMDGPU::FeatureWavefrontSize64);
1773 }
1774
1776 return getRegisterInfo()->getBoolRC();
1777 }
1778
1779 /// \returns Maximum number of work groups per compute unit supported by the
1780 /// subtarget and limited by given \p FlatWorkGroupSize.
1781 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1782 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1783 }
1784
1785 /// \returns Minimum flat work group size supported by the subtarget.
1786 unsigned getMinFlatWorkGroupSize() const override {
1788 }
1789
1790 /// \returns Maximum flat work group size supported by the subtarget.
1791 unsigned getMaxFlatWorkGroupSize() const override {
1793 }
1794
1795 /// \returns Number of waves per execution unit required to support the given
1796 /// \p FlatWorkGroupSize.
1797 unsigned
1798 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1799 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1800 }
1801
1802 /// \returns Minimum number of waves per execution unit supported by the
1803 /// subtarget.
1804 unsigned getMinWavesPerEU() const override {
1806 }
1807
1808 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1809 SDep &Dep,
1810 const TargetSchedModel *SchedModel) const override;
1811
1812 // \returns true if it's beneficial on this subtarget for the scheduler to
1813 // cluster stores as well as loads.
1814 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1815
1816 // \returns the number of address arguments from which to enable MIMG NSA
1817 // on supported architectures.
1818 unsigned getNSAThreshold(const MachineFunction &MF) const;
1819
1820 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1821 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1823
1824 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1825 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1827
1828 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1829 unsigned getDynamicVGPRBlockSize() const {
1830 return DynamicVGPRBlockSize32 ? 32 : 16;
1831 }
1832
1834 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1835 // to the same register.
1836 return false;
1837 }
1838
1839 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1840 // and surronded by S_WAIT_ALU(0xFFE3).
1842 return getGeneration() == GFX12;
1843 }
1844
1845 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1846 // read.
1848 return GFX1250Insts && getGeneration() == GFX12;
1849 }
1850
1851 // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit
1852 // result.
1854 return GFX1250Insts && getGeneration() == GFX12;
1855 }
1856
1857 /// \returns true if the subtarget supports clusters of workgroups.
1858 bool hasClusters() const { return HasClusters; }
1859
1860 /// \returns true if the subtarget requires a wait for xcnt before VMEM
1861 /// accesses that must never be repeated in the event of a page fault/re-try.
1862 /// Atomic stores/rmw and all volatile accesses fall under this criteria.
1866
1867 /// \returns the number of significant bits in the immediate field of the
1868 /// S_NOP instruction.
1869 unsigned getSNopBits() const {
1871 return 7;
1873 return 4;
1874 return 3;
1875 }
1876
1877 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1878 /// num_records.
1882
1886};
1887
1889public:
1890 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1891
1892 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1893
1894 bool hasDispatchPtr() const { return DispatchPtr; }
1895
1896 bool hasQueuePtr() const { return QueuePtr; }
1897
1898 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1899
1900 bool hasDispatchID() const { return DispatchID; }
1901
1902 bool hasFlatScratchInit() const { return FlatScratchInit; }
1903
1904 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1905
1906 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1907
1908 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1909
1910 unsigned getNumFreeUserSGPRs();
1911
1912 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1913
1924
1925 // Returns the size in number of SGPRs for preload user SGPR field.
1927 switch (ID) {
1929 return 2;
1931 return 4;
1932 case DispatchPtrID:
1933 return 2;
1934 case QueuePtrID:
1935 return 2;
1937 return 2;
1938 case DispatchIdID:
1939 return 2;
1940 case FlatScratchInitID:
1941 return 2;
1943 return 1;
1944 }
1945 llvm_unreachable("Unknown UserSGPRID.");
1946 }
1947
1948 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1949
1950private:
1951 const GCNSubtarget &ST;
1952
1953 // Private memory buffer
1954 // Compute directly in sgpr[0:1]
1955 // Other shaders indirect 64-bits at sgpr[0:1]
1956 bool ImplicitBufferPtr = false;
1957
1958 bool PrivateSegmentBuffer = false;
1959
1960 bool DispatchPtr = false;
1961
1962 bool QueuePtr = false;
1963
1964 bool KernargSegmentPtr = false;
1965
1966 bool DispatchID = false;
1967
1968 bool FlatScratchInit = false;
1969
1970 bool PrivateSegmentSize = false;
1971
1972 unsigned NumKernargPreloadSGPRs = 0;
1973
1974 unsigned NumUsedUserSGPRs = 0;
1975};
1976
1977} // end namespace llvm
1978
1979#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasFlatScratchHiInB64InstHazard() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
bool hasSWakeupBarrier() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
bool hasCondSubInsts() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasCvtPkNormVOP3Insts() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool RequiresWaitsBeforeSystemScopeStores
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool hasCvtNormInsts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasLerpInst() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasCubeInsts() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasSubClampInsts() const
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool requiresWaitXCntForSingleAccessInstructions() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasSadInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasCvtPkNormVOP2Insts() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasQsadInsts() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool requiresWaitsBeforeSystemScopeStores() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.