LLVM 19.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
26
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
29
30namespace llvm {
31
32class GCNTargetMachine;
33
35 public AMDGPUSubtarget {
36public:
38
39 // Following 2 enums are documented at:
40 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
41 enum class TrapHandlerAbi {
42 NONE = 0x00,
43 AMDHSA = 0x01,
44 };
45
46 enum class TrapID {
47 LLVMAMDHSATrap = 0x02,
49 };
50
51private:
52 /// GlobalISel related APIs.
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
56 std::unique_ptr<LegalizerInfo> Legalizer;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
58
59protected:
60 // Basic subtarget description.
63 unsigned Gen = INVALID;
65 int LDSBankCount = 0;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81 bool KernargPreload = false;
82
83 // This should not be used directly. 'TargetID' tracks the dynamic settings
84 // for XNACK.
85 bool EnableXNACK = false;
86
87 bool EnableTgSplit = false;
88 bool EnableCuMode = false;
89 bool TrapHandler = false;
90
91 // Used as options.
92 bool EnableLoadStoreOpt = false;
94 bool EnableSIScheduler = false;
95 bool EnableDS128 = false;
96 bool EnablePRTStrictNull = false;
97 bool DumpCode = false;
98
99 // Subtarget statically properties set by tablegen
100 bool FP64 = false;
101 bool FMA = false;
102 bool MIMG_R128 = false;
103 bool CIInsts = false;
104 bool GFX8Insts = false;
105 bool GFX9Insts = false;
106 bool GFX90AInsts = false;
107 bool GFX940Insts = false;
108 bool GFX10Insts = false;
109 bool GFX11Insts = false;
110 bool GFX12Insts = false;
111 bool GFX10_3Insts = false;
112 bool GFX7GFX8GFX9Insts = false;
113 bool SGPRInitBug = false;
114 bool UserSGPRInit16Bug = false;
117 bool HasSMemRealTime = false;
118 bool HasIntClamp = false;
119 bool HasFmaMixInsts = false;
120 bool HasMovrel = false;
121 bool HasVGPRIndexMode = false;
123 bool HasScalarStores = false;
124 bool HasScalarAtomics = false;
125 bool HasSDWAOmod = false;
126 bool HasSDWAScalar = false;
127 bool HasSDWASdst = false;
128 bool HasSDWAMac = false;
129 bool HasSDWAOutModsVOPC = false;
130 bool HasDPP = false;
131 bool HasDPP8 = false;
132 bool HasDPALU_DPP = false;
133 bool HasDPPSrc1SGPR = false;
134 bool HasPackedFP32Ops = false;
135 bool HasImageInsts = false;
137 bool HasR128A16 = false;
138 bool HasA16 = false;
139 bool HasG16 = false;
140 bool HasNSAEncoding = false;
142 bool GFX10_AEncoding = false;
143 bool GFX10_BEncoding = false;
144 bool HasDLInsts = false;
145 bool HasFmacF64Inst = false;
146 bool HasDot1Insts = false;
147 bool HasDot2Insts = false;
148 bool HasDot3Insts = false;
149 bool HasDot4Insts = false;
150 bool HasDot5Insts = false;
151 bool HasDot6Insts = false;
152 bool HasDot7Insts = false;
153 bool HasDot8Insts = false;
154 bool HasDot9Insts = false;
155 bool HasDot10Insts = false;
156 bool HasDot11Insts = false;
157 bool HasMAIInsts = false;
158 bool HasFP8Insts = false;
160 bool HasPkFmacF16Inst = false;
172 /// The maximum number of instructions that may be placed within an S_CLAUSE,
173 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
174 /// indicates a lack of S_CLAUSE support.
176 bool SupportsSRAMECC = false;
177
178 // This should not be used directly. 'TargetID' tracks the dynamic settings
179 // for SRAMECC.
180 bool EnableSRAMECC = false;
181
182 bool HasNoSdstCMPX = false;
183 bool HasVscnt = false;
184 bool HasGetWaveIdInst = false;
185 bool HasSMemTimeInst = false;
188 bool HasVOP3Literal = false;
189 bool HasNoDataDepHazard = false;
190 bool FlatAddressSpace = false;
191 bool FlatInstOffsets = false;
192 bool FlatGlobalInsts = false;
193 bool FlatScratchInsts = false;
196 bool EnableFlatScratch = false;
198 bool HasGDS = false;
199 bool HasGWS = false;
200 bool AddNoCarryInsts = false;
201 bool HasUnpackedD16VMem = false;
202 bool LDSMisalignedBug = false;
205 bool UnalignedDSAccess = false;
206 bool HasPackedTID = false;
207 bool ScalarizeGlobal = false;
208 bool HasSALUFloatInsts = false;
212
219 bool HasNSAtoVMEMBug = false;
220 bool HasNSAClauseBug = false;
221 bool HasOffset3fBug = false;
226 bool HasGFX11FullVGPRs = false;
227 bool HasMADIntraFwdBug = false;
228 bool HasVOPDInsts = false;
231
232 bool RequiresCOV6 = false;
233
234 // Dummy feature to use for assembler in tablegen.
235 bool FeatureDisable = false;
236
238private:
239 SIInstrInfo InstrInfo;
240 SITargetLowering TLInfo;
241 SIFrameLowering FrameLowering;
242
243public:
244 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
245 const GCNTargetMachine &TM);
246 ~GCNSubtarget() override;
247
249 StringRef GPU, StringRef FS);
250
251 const SIInstrInfo *getInstrInfo() const override {
252 return &InstrInfo;
253 }
254
255 const SIFrameLowering *getFrameLowering() const override {
256 return &FrameLowering;
257 }
258
259 const SITargetLowering *getTargetLowering() const override {
260 return &TLInfo;
261 }
262
263 const SIRegisterInfo *getRegisterInfo() const override {
264 return &InstrInfo.getRegisterInfo();
265 }
266
267 const CallLowering *getCallLowering() const override {
268 return CallLoweringInfo.get();
269 }
270
271 const InlineAsmLowering *getInlineAsmLowering() const override {
272 return InlineAsmLoweringInfo.get();
273 }
274
276 return InstSelector.get();
277 }
278
279 const LegalizerInfo *getLegalizerInfo() const override {
280 return Legalizer.get();
281 }
282
283 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
284 return RegBankInfo.get();
285 }
286
288 return TargetID;
289 }
290
291 // Nothing implemented, just prevent crashes on use.
293 return &TSInfo;
294 }
295
297 return &InstrItins;
298 }
299
301
303 return (Generation)Gen;
304 }
305
306 unsigned getMaxWaveScratchSize() const {
307 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
308 if (getGeneration() >= GFX12) {
309 // 18-bit field in units of 64-dword.
310 return (64 * 4) * ((1 << 18) - 1);
311 }
312 if (getGeneration() == GFX11) {
313 // 15-bit field in units of 64-dword.
314 return (64 * 4) * ((1 << 15) - 1);
315 }
316 // 13-bit field in units of 256-dword.
317 return (256 * 4) * ((1 << 13) - 1);
318 }
319
320 /// Return the number of high bits known to be zero for a frame index.
323 }
324
325 int getLDSBankCount() const {
326 return LDSBankCount;
327 }
328
329 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
330 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
331 }
332
333 unsigned getConstantBusLimit(unsigned Opcode) const;
334
335 /// Returns if the result of this instruction with a 16-bit result returned in
336 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
337 /// the original value.
338 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
339
340 bool supportsWGP() const { return getGeneration() >= GFX10; }
341
342 bool hasIntClamp() const {
343 return HasIntClamp;
344 }
345
346 bool hasFP64() const {
347 return FP64;
348 }
349
350 bool hasMIMG_R128() const {
351 return MIMG_R128;
352 }
353
354 bool hasHWFP64() const {
355 return FP64;
356 }
357
358 bool hasHalfRate64Ops() const {
359 return HalfRate64Ops;
360 }
361
362 bool hasFullRate64Ops() const {
363 return FullRate64Ops;
364 }
365
366 bool hasAddr64() const {
368 }
369
370 bool hasFlat() const {
372 }
373
374 // Return true if the target only has the reverse operand versions of VALU
375 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
376 bool hasOnlyRevVALUShifts() const {
378 }
379
380 bool hasFractBug() const {
382 }
383
384 bool hasBFE() const {
385 return true;
386 }
387
388 bool hasBFI() const {
389 return true;
390 }
391
392 bool hasBFM() const {
393 return hasBFE();
394 }
395
396 bool hasBCNT(unsigned Size) const {
397 return true;
398 }
399
400 bool hasFFBL() const {
401 return true;
402 }
403
404 bool hasFFBH() const {
405 return true;
406 }
407
408 bool hasMed3_16() const {
410 }
411
412 bool hasMin3Max3_16() const {
414 }
415
416 bool hasFmaMixInsts() const {
417 return HasFmaMixInsts;
418 }
419
420 bool hasCARRY() const {
421 return true;
422 }
423
424 bool hasFMA() const {
425 return FMA;
426 }
427
428 bool hasSwap() const {
429 return GFX9Insts;
430 }
431
432 bool hasScalarPackInsts() const {
433 return GFX9Insts;
434 }
435
436 bool hasScalarMulHiInsts() const {
437 return GFX9Insts;
438 }
439
440 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
441
444 }
445
447 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
448 return getGeneration() >= GFX9;
449 }
450
451 /// True if the offset field of DS instructions works as expected. On SI, the
452 /// offset uses a 16-bit adder and does not always wrap properly.
453 bool hasUsableDSOffset() const {
454 return getGeneration() >= SEA_ISLANDS;
455 }
456
459 }
460
461 /// Condition output from div_scale is usable.
464 }
465
466 /// Extra wait hazard is needed in some cases before
467 /// s_cbranch_vccnz/s_cbranch_vccz.
468 bool hasReadVCCZBug() const {
469 return getGeneration() <= SEA_ISLANDS;
470 }
471
472 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
474 return getGeneration() >= GFX10;
475 }
476
477 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
478 /// was written by a VALU instruction.
481 }
482
483 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
484 /// SGPR was written by a VALU Instruction.
487 }
488
489 bool hasRFEHazards() const {
491 }
492
493 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
494 unsigned getSetRegWaitStates() const {
495 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
496 }
497
498 bool dumpCode() const {
499 return DumpCode;
500 }
501
502 /// Return the amount of LDS that can be used that will not restrict the
503 /// occupancy lower than WaveCount.
504 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
505 const Function &) const;
506
509 }
510
511 /// \returns If target supports S_DENORM_MODE.
512 bool hasDenormModeInst() const {
514 }
515
516 bool useFlatForGlobal() const {
517 return FlatForGlobal;
518 }
519
520 /// \returns If target supports ds_read/write_b128 and user enables generation
521 /// of ds_read/write_b128.
522 bool useDS128() const {
523 return CIInsts && EnableDS128;
524 }
525
526 /// \return If target supports ds_read/write_b96/128.
527 bool hasDS96AndDS128() const {
528 return CIInsts;
529 }
530
531 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
532 bool haveRoundOpsF64() const {
533 return CIInsts;
534 }
535
536 /// \returns If MUBUF instructions always perform range checking, even for
537 /// buffer resources used for private memory access.
540 }
541
542 /// \returns If target requires PRT Struct NULL support (zero result registers
543 /// for sparse texture support).
544 bool usePRTStrictNull() const {
545 return EnablePRTStrictNull;
546 }
547
550 }
551
552 /// \returns true if the target supports backing off of s_barrier instructions
553 /// when an exception is raised.
555 return BackOffBarrier;
556 }
557
560 }
561
564 }
565
566 bool hasUnalignedDSAccess() const {
567 return UnalignedDSAccess;
568 }
569
572 }
573
576 }
577
579 return UnalignedAccessMode;
580 }
581
582 bool hasApertureRegs() const {
583 return HasApertureRegs;
584 }
585
586 bool isTrapHandlerEnabled() const {
587 return TrapHandler;
588 }
589
590 bool isXNACKEnabled() const {
591 return TargetID.isXnackOnOrAny();
592 }
593
594 bool isTgSplitEnabled() const {
595 return EnableTgSplit;
596 }
597
598 bool isCuModeEnabled() const {
599 return EnableCuMode;
600 }
601
602 bool hasFlatAddressSpace() const {
603 return FlatAddressSpace;
604 }
605
606 bool hasFlatScrRegister() const {
607 return hasFlatAddressSpace();
608 }
609
610 bool hasFlatInstOffsets() const {
611 return FlatInstOffsets;
612 }
613
614 bool hasFlatGlobalInsts() const {
615 return FlatGlobalInsts;
616 }
617
618 bool hasFlatScratchInsts() const {
619 return FlatScratchInsts;
620 }
621
622 // Check if target supports ST addressing mode with FLAT scratch instructions.
623 // The ST addressing mode means no registers are used, either VGPR or SGPR,
624 // but only immediate offset is swizzled and added to the FLAT scratch base.
625 bool hasFlatScratchSTMode() const {
627 }
628
629 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
630
633 }
634
635 bool enableFlatScratch() const {
636 return flatScratchIsArchitected() ||
638 }
639
640 bool hasGlobalAddTidInsts() const {
641 return GFX10_BEncoding;
642 }
643
644 bool hasAtomicCSub() const {
645 return GFX10_BEncoding;
646 }
647
648 // BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
650
651 bool hasExportInsts() const {
652 return !hasGFX940Insts();
653 }
654
655 bool hasVINTERPEncoding() const {
656 return GFX11Insts;
657 }
658
659 // DS_ADD_F64/DS_ADD_RTN_F64
660 bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
661
663 return getGeneration() >= GFX9;
664 }
665
668 }
669
671 return getGeneration() > GFX9;
672 }
673
674 bool hasD16LoadStore() const {
675 return getGeneration() >= GFX9;
676 }
677
680 }
681
682 bool hasD16Images() const {
684 }
685
686 /// Return if most LDS instructions have an m0 use that require m0 to be
687 /// initialized.
688 bool ldsRequiresM0Init() const {
689 return getGeneration() < GFX9;
690 }
691
692 // True if the hardware rewinds and replays GWS operations if a wave is
693 // preempted.
694 //
695 // If this is false, a GWS operation requires testing if a nack set the
696 // MEM_VIOL bit, and repeating if so.
697 bool hasGWSAutoReplay() const {
698 return getGeneration() >= GFX9;
699 }
700
701 /// \returns if target has ds_gws_sema_release_all instruction.
702 bool hasGWSSemaReleaseAll() const {
703 return CIInsts;
704 }
705
706 /// \returns true if the target has integer add/sub instructions that do not
707 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
708 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
709 /// for saturation.
710 bool hasAddNoCarry() const {
711 return AddNoCarryInsts;
712 }
713
714 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
715
716 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
717
718 bool hasUnpackedD16VMem() const {
719 return HasUnpackedD16VMem;
720 }
721
722 // Covers VS/PS/CS graphics shaders
723 bool isMesaGfxShader(const Function &F) const {
724 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
725 }
726
727 bool hasMad64_32() const {
728 return getGeneration() >= SEA_ISLANDS;
729 }
730
731 bool hasSDWAOmod() const {
732 return HasSDWAOmod;
733 }
734
735 bool hasSDWAScalar() const {
736 return HasSDWAScalar;
737 }
738
739 bool hasSDWASdst() const {
740 return HasSDWASdst;
741 }
742
743 bool hasSDWAMac() const {
744 return HasSDWAMac;
745 }
746
747 bool hasSDWAOutModsVOPC() const {
748 return HasSDWAOutModsVOPC;
749 }
750
751 bool hasDLInsts() const {
752 return HasDLInsts;
753 }
754
755 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
756
757 bool hasDot1Insts() const {
758 return HasDot1Insts;
759 }
760
761 bool hasDot2Insts() const {
762 return HasDot2Insts;
763 }
764
765 bool hasDot3Insts() const {
766 return HasDot3Insts;
767 }
768
769 bool hasDot4Insts() const {
770 return HasDot4Insts;
771 }
772
773 bool hasDot5Insts() const {
774 return HasDot5Insts;
775 }
776
777 bool hasDot6Insts() const {
778 return HasDot6Insts;
779 }
780
781 bool hasDot7Insts() const {
782 return HasDot7Insts;
783 }
784
785 bool hasDot8Insts() const {
786 return HasDot8Insts;
787 }
788
789 bool hasDot9Insts() const {
790 return HasDot9Insts;
791 }
792
793 bool hasDot10Insts() const {
794 return HasDot10Insts;
795 }
796
797 bool hasDot11Insts() const {
798 return HasDot11Insts;
799 }
800
801 bool hasMAIInsts() const {
802 return HasMAIInsts;
803 }
804
805 bool hasFP8Insts() const {
806 return HasFP8Insts;
807 }
808
810
811 bool hasPkFmacF16Inst() const {
812 return HasPkFmacF16Inst;
813 }
814
816
818
819 bool hasAtomicFaddInsts() const {
821 }
822
824
826
829 }
830
833 }
834
837 }
838
840
842
845 }
846
847 bool hasNoSdstCMPX() const {
848 return HasNoSdstCMPX;
849 }
850
851 bool hasVscnt() const {
852 return HasVscnt;
853 }
854
855 bool hasGetWaveIdInst() const {
856 return HasGetWaveIdInst;
857 }
858
859 bool hasSMemTimeInst() const {
860 return HasSMemTimeInst;
861 }
862
865 }
866
869 }
870
871 bool hasVOP3Literal() const {
872 return HasVOP3Literal;
873 }
874
875 bool hasNoDataDepHazard() const {
876 return HasNoDataDepHazard;
877 }
878
880 return getGeneration() < SEA_ISLANDS;
881 }
882
883 bool hasInstPrefetch() const {
884 return getGeneration() == GFX10 || getGeneration() == GFX11;
885 }
886
887 bool hasPrefetch() const { return GFX12Insts; }
888
889 // Has s_cmpk_* instructions.
890 bool hasSCmpK() const { return getGeneration() < GFX12; }
891
892 // Scratch is allocated in 256 dword per wave blocks for the entire
893 // wavefront. When viewed from the perspective of an arbitrary workitem, this
894 // is 4-byte aligned.
895 //
896 // Only 4-byte alignment is really needed to access anything. Transformations
897 // on the pointer value itself may rely on the alignment / known low bits of
898 // the pointer. Set this to something above the minimum to avoid needing
899 // dynamic realignment in common cases.
900 Align getStackAlignment() const { return Align(16); }
901
902 bool enableMachineScheduler() const override {
903 return true;
904 }
905
906 bool useAA() const override;
907
908 bool enableSubRegLiveness() const override {
909 return true;
910 }
911
914
915 // static wrappers
916 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
917
918 // XXX - Why is this here if it isn't in the default pass set?
919 bool enableEarlyIfConversion() const override {
920 return true;
921 }
922
924 unsigned NumRegionInstrs) const override;
925
926 unsigned getMaxNumUserSGPRs() const {
927 return AMDGPU::getMaxNumUserSGPRs(*this);
928 }
929
930 bool hasSMemRealTime() const {
931 return HasSMemRealTime;
932 }
933
934 bool hasMovrel() const {
935 return HasMovrel;
936 }
937
938 bool hasVGPRIndexMode() const {
939 return HasVGPRIndexMode;
940 }
941
942 bool useVGPRIndexMode() const;
943
944 bool hasScalarCompareEq64() const {
946 }
947
949
950 bool hasScalarStores() const {
951 return HasScalarStores;
952 }
953
954 bool hasScalarAtomics() const {
955 return HasScalarAtomics;
956 }
957
958 bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
959
960 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
961 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
962
963 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
964 bool hasPermLane64() const { return getGeneration() >= GFX11; }
965
966 bool hasDPP() const {
967 return HasDPP;
968 }
969
970 bool hasDPPBroadcasts() const {
971 return HasDPP && getGeneration() < GFX10;
972 }
973
975 return HasDPP && getGeneration() < GFX10;
976 }
977
978 bool hasDPP8() const {
979 return HasDPP8;
980 }
981
982 bool hasDPALU_DPP() const {
983 return HasDPALU_DPP;
984 }
985
986 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
987
988 bool hasPackedFP32Ops() const {
989 return HasPackedFP32Ops;
990 }
991
992 // Has V_PK_MOV_B32 opcode
993 bool hasPkMovB32() const {
994 return GFX90AInsts;
995 }
996
998 return getGeneration() >= GFX10 || hasGFX940Insts();
999 }
1000
1001 bool hasImageInsts() const {
1002 return HasImageInsts;
1003 }
1004
1006 return HasExtendedImageInsts;
1007 }
1008
1009 bool hasR128A16() const {
1010 return HasR128A16;
1011 }
1012
1013 bool hasA16() const { return HasA16; }
1014
1015 bool hasG16() const { return HasG16; }
1016
1017 bool hasOffset3fBug() const {
1018 return HasOffset3fBug;
1019 }
1020
1022
1024
1025 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1026
1028
1029 bool hasNSAEncoding() const { return HasNSAEncoding; }
1030
1031 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1032
1034
1035 unsigned getNSAMaxSize(bool HasSampler = false) const {
1036 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1037 }
1038
1039 bool hasGFX10_AEncoding() const {
1040 return GFX10_AEncoding;
1041 }
1042
1043 bool hasGFX10_BEncoding() const {
1044 return GFX10_BEncoding;
1045 }
1046
1047 bool hasGFX10_3Insts() const {
1048 return GFX10_3Insts;
1049 }
1050
1051 bool hasMadF16() const;
1052
1053 bool hasMovB64() const { return GFX940Insts; }
1054
1055 bool hasLshlAddB64() const { return GFX940Insts; }
1056
1057 bool enableSIScheduler() const {
1058 return EnableSIScheduler;
1059 }
1060
1061 bool loadStoreOptEnabled() const {
1062 return EnableLoadStoreOpt;
1063 }
1064
1065 bool hasSGPRInitBug() const {
1066 return SGPRInitBug;
1067 }
1068
1070 return UserSGPRInit16Bug && isWave32();
1071 }
1072
1074
1077 }
1078
1081 }
1082
1085 }
1086
1087 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1089 return CIInsts;
1090 }
1091
1094 }
1095
1099 }
1100
1103 }
1104
1107 }
1108
1111 }
1112
1115 }
1116
1119 }
1120
1121 bool hasLDSMisalignedBug() const {
1122 return LDSMisalignedBug && !EnableCuMode;
1123 }
1124
1126 return HasInstFwdPrefetchBug;
1127 }
1128
1130 return HasVcmpxExecWARHazard;
1131 }
1132
1135 }
1136
1137 // Shift amount of a 64 bit shift cannot be a highest allocated register
1138 // if also at the end of the allocation block.
1140 return GFX90AInsts && !GFX940Insts;
1141 }
1142
1143 // Has one cycle hazard on transcendental instruction feeding a
1144 // non transcendental VALU.
1145 bool hasTransForwardingHazard() const { return GFX940Insts; }
1146
1147 // Has one cycle hazard on a VALU instruction partially writing dst with
1148 // a shift of result bits feeding another VALU instruction.
1150
1151 // Cannot use op_sel with v_dot instructions.
1152 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1153
1154 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1155 bool hasVDecCoExecHazard() const {
1156 return GFX940Insts;
1157 }
1158
1159 bool hasNSAtoVMEMBug() const {
1160 return HasNSAtoVMEMBug;
1161 }
1162
1163 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1164
1165 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1166
1167 bool hasGFX90AInsts() const { return GFX90AInsts; }
1168
1170 return getGeneration() == GFX10;
1171 }
1172
1173 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1174
1175 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1176
1177 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1178
1180 return getGeneration() == GFX11;
1181 }
1182
1184
1186
1187 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1188
1189 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1190
1191 /// Return if operations acting on VGPR tuples require even alignment.
1192 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1193
1194 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1195 bool hasSPackHL() const { return GFX11Insts; }
1196
1197 /// Return true if the target's EXP instruction has the COMPR flag, which
1198 /// affects the meaning of the EN (enable) bits.
1199 bool hasCompressedExport() const { return !GFX11Insts; }
1200
1201 /// Return true if the target's EXP instruction supports the NULL export
1202 /// target.
1203 bool hasNullExportTarget() const { return !GFX11Insts; }
1204
1205 bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1206
1207 bool hasVOPDInsts() const { return HasVOPDInsts; }
1208
1210
1211 /// Return true if the target has the S_DELAY_ALU instruction.
1212 bool hasDelayAlu() const { return GFX11Insts; }
1213
1214 bool hasPackedTID() const { return HasPackedTID; }
1215
1216 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1217 // hasGFX90AInsts is also true.
1218 bool hasGFX940Insts() const { return GFX940Insts; }
1219
1220 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1221
1223
1225
1227
1228 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1229 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1230 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1231
1232 /// \returns The maximum number of instructions that can be enclosed in an
1233 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1234 /// instruction.
1235 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1236
1237 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1238 /// SGPRs
1239 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1240
1241 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1242 /// VGPRs
1243 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1244
1245 /// Return occupancy for the given function. Used LDS and a number of
1246 /// registers if provided.
1247 /// Note, occupancy can be affected by the scratch allocation as well, but
1248 /// we do not have enough information to compute it.
1249 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1250 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1251
1252 /// \returns true if the flat_scratch register should be initialized with the
1253 /// pointer to the wave's scratch memory rather than a size and offset.
1256 }
1257
1258 /// \returns true if the flat_scratch register is initialized by the HW.
1259 /// In this case it is readonly.
1261
1262 /// \returns true if the architected SGPRs are enabled.
1264
1265 /// \returns true if Global Data Share is supported.
1266 bool hasGDS() const { return HasGDS; }
1267
1268 /// \returns true if Global Wave Sync is supported.
1269 bool hasGWS() const { return HasGWS; }
1270
1271 /// \returns true if the machine has merged shaders in which s0-s7 are
1272 /// reserved by the hardware and user SGPRs start at s8
1273 bool hasMergedShaders() const {
1274 return getGeneration() >= GFX9;
1275 }
1276
1277 // \returns true if the target supports the pre-NGG legacy geometry path.
1278 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1279
1280 // \returns true if preloading kernel arguments is supported.
1281 bool hasKernargPreload() const { return KernargPreload; }
1282
1283 // \returns true if the target has split barriers feature
1284 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1285
1286 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1287 bool hasCvtFP8VOP1Bug() const { return true; }
1288
1289 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1290 // no-return form.
1292
1293 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1294 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1295
1296 // \returns true if the target has IEEE kernel descriptor mode bit
1297 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1298
1299 // \returns true if the target has IEEE fminimum/fmaximum instructions
1300 bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
1301
1302 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1303 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1304
1305 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1306 /// values.
1307 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1308
1309 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1310 // of sign-extending.
1311 bool hasGetPCZeroExtension() const { return GFX12Insts; }
1312
1313 /// \returns SGPR allocation granularity supported by the subtarget.
1314 unsigned getSGPRAllocGranule() const {
1316 }
1317
1318 /// \returns SGPR encoding granularity supported by the subtarget.
1319 unsigned getSGPREncodingGranule() const {
1321 }
1322
1323 /// \returns Total number of SGPRs supported by the subtarget.
1324 unsigned getTotalNumSGPRs() const {
1326 }
1327
1328 /// \returns Addressable number of SGPRs supported by the subtarget.
1329 unsigned getAddressableNumSGPRs() const {
1331 }
1332
1333 /// \returns Minimum number of SGPRs that meets the given number of waves per
1334 /// execution unit requirement supported by the subtarget.
1335 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1336 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1337 }
1338
1339 /// \returns Maximum number of SGPRs that meets the given number of waves per
1340 /// execution unit requirement supported by the subtarget.
1341 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1342 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1343 }
1344
1345 /// \returns Reserved number of SGPRs. This is common
1346 /// utility function called by MachineFunction and
1347 /// Function variants of getReservedNumSGPRs.
1348 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1349 /// \returns Reserved number of SGPRs for given machine function \p MF.
1350 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1351
1352 /// \returns Reserved number of SGPRs for given function \p F.
1353 unsigned getReservedNumSGPRs(const Function &F) const;
1354
1355 /// \returns max num SGPRs. This is the common utility
1356 /// function called by MachineFunction and Function
1357 /// variants of getMaxNumSGPRs.
1358 unsigned getBaseMaxNumSGPRs(const Function &F,
1359 std::pair<unsigned, unsigned> WavesPerEU,
1360 unsigned PreloadedSGPRs,
1361 unsigned ReservedNumSGPRs) const;
1362
1363 /// \returns Maximum number of SGPRs that meets number of waves per execution
1364 /// unit requirement for function \p MF, or number of SGPRs explicitly
1365 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1366 ///
1367 /// \returns Value that meets number of waves per execution unit requirement
1368 /// if explicitly requested value cannot be converted to integer, violates
1369 /// subtarget's specifications, or does not meet number of waves per execution
1370 /// unit requirement.
1371 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1372
1373 /// \returns Maximum number of SGPRs that meets number of waves per execution
1374 /// unit requirement for function \p F, or number of SGPRs explicitly
1375 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1376 ///
1377 /// \returns Value that meets number of waves per execution unit requirement
1378 /// if explicitly requested value cannot be converted to integer, violates
1379 /// subtarget's specifications, or does not meet number of waves per execution
1380 /// unit requirement.
1381 unsigned getMaxNumSGPRs(const Function &F) const;
1382
1383 /// \returns VGPR allocation granularity supported by the subtarget.
1384 unsigned getVGPRAllocGranule() const {
1386 }
1387
1388 /// \returns VGPR encoding granularity supported by the subtarget.
1389 unsigned getVGPREncodingGranule() const {
1391 }
1392
1393 /// \returns Total number of VGPRs supported by the subtarget.
1394 unsigned getTotalNumVGPRs() const {
1396 }
1397
1398 /// \returns Addressable number of architectural VGPRs supported by the
1399 /// subtarget.
1402 }
1403
1404 /// \returns Addressable number of VGPRs supported by the subtarget.
1405 unsigned getAddressableNumVGPRs() const {
1407 }
1408
1409 /// \returns the minimum number of VGPRs that will prevent achieving more than
1410 /// the specified number of waves \p WavesPerEU.
1411 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1412 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1413 }
1414
1415 /// \returns the maximum number of VGPRs that can be used and still achieved
1416 /// at least the specified number of waves \p WavesPerEU.
1417 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1418 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1419 }
1420
1421 /// \returns max num VGPRs. This is the common utility function
1422 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1423 unsigned getBaseMaxNumVGPRs(const Function &F,
1424 std::pair<unsigned, unsigned> WavesPerEU) const;
1425 /// \returns Maximum number of VGPRs that meets number of waves per execution
1426 /// unit requirement for function \p F, or number of VGPRs explicitly
1427 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1428 ///
1429 /// \returns Value that meets number of waves per execution unit requirement
1430 /// if explicitly requested value cannot be converted to integer, violates
1431 /// subtarget's specifications, or does not meet number of waves per execution
1432 /// unit requirement.
1433 unsigned getMaxNumVGPRs(const Function &F) const;
1434
1435 unsigned getMaxNumAGPRs(const Function &F) const {
1436 return getMaxNumVGPRs(F);
1437 }
1438
1439 /// \returns Maximum number of VGPRs that meets number of waves per execution
1440 /// unit requirement for function \p MF, or number of VGPRs explicitly
1441 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1442 ///
1443 /// \returns Value that meets number of waves per execution unit requirement
1444 /// if explicitly requested value cannot be converted to integer, violates
1445 /// subtarget's specifications, or does not meet number of waves per execution
1446 /// unit requirement.
1447 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1448
1449 void getPostRAMutations(
1450 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1451 const override;
1452
1453 std::unique_ptr<ScheduleDAGMutation>
1455
1456 bool isWave32() const {
1457 return getWavefrontSize() == 32;
1458 }
1459
1460 bool isWave64() const {
1461 return getWavefrontSize() == 64;
1462 }
1463
1465 return getRegisterInfo()->getBoolRC();
1466 }
1467
1468 /// \returns Maximum number of work groups per compute unit supported by the
1469 /// subtarget and limited by given \p FlatWorkGroupSize.
1470 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1471 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1472 }
1473
1474 /// \returns Minimum flat work group size supported by the subtarget.
1475 unsigned getMinFlatWorkGroupSize() const override {
1477 }
1478
1479 /// \returns Maximum flat work group size supported by the subtarget.
1480 unsigned getMaxFlatWorkGroupSize() const override {
1482 }
1483
1484 /// \returns Number of waves per execution unit required to support the given
1485 /// \p FlatWorkGroupSize.
1486 unsigned
1487 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1488 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1489 }
1490
1491 /// \returns Minimum number of waves per execution unit supported by the
1492 /// subtarget.
1493 unsigned getMinWavesPerEU() const override {
1495 }
1496
1497 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1498 SDep &Dep) const override;
1499
1500 // \returns true if it's beneficial on this subtarget for the scheduler to
1501 // cluster stores as well as loads.
1502 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1503
1504 // \returns the number of address arguments from which to enable MIMG NSA
1505 // on supported architectures.
1506 unsigned getNSAThreshold(const MachineFunction &MF) const;
1507
1508 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1509 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1511 // Currently all targets that support the dealloc VGPRs message also require
1512 // the nop.
1513 return true;
1514 }
1515};
1516
1518public:
1519 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1520
1521 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1522
1523 bool hasDispatchPtr() const { return DispatchPtr; }
1524
1525 bool hasQueuePtr() const { return QueuePtr; }
1526
1527 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1528
1529 bool hasDispatchID() const { return DispatchID; }
1530
1531 bool hasFlatScratchInit() const { return FlatScratchInit; }
1532
1533 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1534
1535 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1536
1537 unsigned getNumFreeUserSGPRs();
1538
1539 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1540
1541 enum UserSGPRID : unsigned {
1551
1552 // Returns the size in number of SGPRs for preload user SGPR field.
1554 switch (ID) {
1556 return 2;
1558 return 4;
1559 case DispatchPtrID:
1560 return 2;
1561 case QueuePtrID:
1562 return 2;
1564 return 2;
1565 case DispatchIdID:
1566 return 2;
1567 case FlatScratchInitID:
1568 return 2;
1570 return 1;
1571 }
1572 llvm_unreachable("Unknown UserSGPRID.");
1573 }
1574
1575 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1576
1577private:
1578 const GCNSubtarget &ST;
1579
1580 // Private memory buffer
1581 // Compute directly in sgpr[0:1]
1582 // Other shaders indirect 64-bits at sgpr[0:1]
1583 bool ImplicitBufferPtr = false;
1584
1585 bool PrivateSegmentBuffer = false;
1586
1587 bool DispatchPtr = false;
1588
1589 bool QueuePtr = false;
1590
1591 bool KernargSegmentPtr = false;
1592
1593 bool DispatchID = false;
1594
1595 bool FlatScratchInit = false;
1596
1597 unsigned NumKernargPreloadSGPRs = 0;
1598
1599 unsigned NumUsedUserSGPRs = 0;
1600};
1601
1602} // end namespace llvm
1603
1604#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
Definition: GCNSubtarget.h:887
bool hasFlat() const
Definition: GCNSubtarget.h:370
bool hasD16Images() const
Definition: GCNSubtarget.h:682
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:815
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:731
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:218
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:473
bool hasSwap() const
Definition: GCNSubtarget.h:428
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:811
bool hasDot2Insts() const
Definition: GCNSubtarget.h:761
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:674
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:735
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:554
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:944
int getLDSBankCount() const
Definition: GCNSubtarget.h:325
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:376
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:462
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:453
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:908
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:974
unsigned getSGPRAllocGranule() const
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:660
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:670
bool flatScratchIsPointer() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
bool hasSDWAMac() const
Definition: GCNSubtarget.h:743
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:809
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:781
bool hasApertureRegs() const
Definition: GCNSubtarget.h:582
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:457
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:610
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:879
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:841
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:855
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:912
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:751
bool hasExtendedImageInsts() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:396
bool hasMAIInsts() const
Definition: GCNSubtarget.h:801
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:618
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:662
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:354
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:512
bool hasMFMAInlineLiteralBug() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:859
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:570
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:251
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:757
bool hasDot3Insts() const
Definition: GCNSubtarget.h:765
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:271
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:209
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:548
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:823
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:296
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:187
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
Definition: GCNSubtarget.h:993
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
Definition: GCNSubtarget.h:900
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:538
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:440
bool hasDot11Insts() const
Definition: GCNSubtarget.h:797
bool enableFlatScratch() const
Definition: GCNSubtarget.h:635
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:558
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:640
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:606
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
Definition: GCNSubtarget.h:964
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:446
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:594
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:839
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:805
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:263
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:997
bool hasVscnt() const
Definition: GCNSubtarget.h:851
bool hasMad64_32() const
Definition: GCNSubtarget.h:727
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:275
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:116
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:522
bool hasExtendedWaitCounts() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:678
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:755
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:883
unsigned maxHardClauseLength() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:723
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:651
bool hasDPP() const
Definition: GCNSubtarget.h:966
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:655
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:283
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:442
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:598
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:954
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:255
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:574
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:292
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:747
bool hasGFX11FullVGPRs() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:287
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:631
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:958
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:498
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:875
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:566
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:412
bool hasIntClamp() const
Definition: GCNSubtarget.h:342
bool hasGFX10_AEncoding() const
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:666
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:494
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:259
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:988
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:777
bool hasGFX940Insts() const
bool hasLshlAddB64() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:362
bool hasScalarStores() const
Definition: GCNSubtarget.h:950
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:586
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:902
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:162
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:614
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:913
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:716
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:321
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:867
bool hasSDWASdst() const
Definition: GCNSubtarget.h:739
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:171
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:432
bool hasFFBL() const
Definition: GCNSubtarget.h:400
bool hasNSAEncoding() const
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:930
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:602
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:970
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:544
bool hasMovB64() const
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:408
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:934
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:817
bool hasBFI() const
Definition: GCNSubtarget.h:388
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:688
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:215
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:938
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:166
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:562
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:329
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
Definition: GCNSubtarget.h:424
bool hasDot10Insts() const
Definition: GCNSubtarget.h:793
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:507
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:404
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:629
bool supportsWGP() const
Definition: GCNSubtarget.h:340
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:358
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:819
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:165
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:195
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:827
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:350
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:978
bool hasDot5Insts() const
Definition: GCNSubtarget.h:773
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:926
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:825
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:175
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:961
bool hasFlatScratchSVSSwizzleBug() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
Definition: GCNSubtarget.h:948
bool hasVDecCoExecHazard() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:392
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:532
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:785
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:436
bool hasSCmpK() const
Definition: GCNSubtarget.h:890
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:279
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:527
bool hasGWS() const
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:516
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:237
Generation getGeneration() const
Definition: GCNSubtarget.h:302
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:871
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:831
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:847
bool hasBufferFlatGlobalAtomicsF64() const
Definition: GCNSubtarget.h:649
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:590
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:714
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:718
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:919
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:479
bool hasRFEHazards() const
Definition: GCNSubtarget.h:489
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:485
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:625
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:702
bool hasDPALU_DPP() const
Definition: GCNSubtarget.h:982
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:835
bool hasAddr64() const
Definition: GCNSubtarget.h:366
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:214
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:168
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:578
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:468
bool isWave64() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:416
bool hasCARRY() const
Definition: GCNSubtarget.h:420
bool hasPackedTID() const
bool hasFP64() const
Definition: GCNSubtarget.h:346
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:710
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:863
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:93
bool hasFractBug() const
Definition: GCNSubtarget.h:380
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
Definition: GCNSubtarget.h:986
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:306
bool hasDot4Insts() const
Definition: GCNSubtarget.h:769
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:789
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:644
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:843
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:267
bool hasBFE() const
Definition: GCNSubtarget.h:384
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:697
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.