LLVM 20.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
26
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
29
30namespace llvm {
31
32class GCNTargetMachine;
33
35 public AMDGPUSubtarget {
36public:
38
39 // Following 2 enums are documented at:
40 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
41 enum class TrapHandlerAbi {
42 NONE = 0x00,
43 AMDHSA = 0x01,
44 };
45
46 enum class TrapID {
47 LLVMAMDHSATrap = 0x02,
49 };
50
51private:
52 /// GlobalISel related APIs.
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
56 std::unique_ptr<LegalizerInfo> Legalizer;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
58
59protected:
60 // Basic subtarget description.
63 unsigned Gen = INVALID;
65 int LDSBankCount = 0;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81 bool KernargPreload = false;
82
83 // This should not be used directly. 'TargetID' tracks the dynamic settings
84 // for XNACK.
85 bool EnableXNACK = false;
86
87 bool EnableTgSplit = false;
88 bool EnableCuMode = false;
89 bool TrapHandler = false;
90 bool EnablePreciseMemory = false;
91
92 // Used as options.
93 bool EnableLoadStoreOpt = false;
95 bool EnableSIScheduler = false;
96 bool EnableDS128 = false;
97 bool EnablePRTStrictNull = false;
98 bool DumpCode = false;
99
100 // Subtarget statically properties set by tablegen
101 bool FP64 = false;
102 bool FMA = false;
103 bool MIMG_R128 = false;
104 bool CIInsts = false;
105 bool GFX8Insts = false;
106 bool GFX9Insts = false;
107 bool GFX90AInsts = false;
108 bool GFX940Insts = false;
109 bool GFX10Insts = false;
110 bool GFX11Insts = false;
111 bool GFX12Insts = false;
112 bool GFX10_3Insts = false;
113 bool GFX7GFX8GFX9Insts = false;
114 bool SGPRInitBug = false;
115 bool UserSGPRInit16Bug = false;
118 bool HasSMemRealTime = false;
119 bool HasIntClamp = false;
120 bool HasFmaMixInsts = false;
121 bool HasMovrel = false;
122 bool HasVGPRIndexMode = false;
124 bool HasScalarStores = false;
125 bool HasScalarAtomics = false;
126 bool HasSDWAOmod = false;
127 bool HasSDWAScalar = false;
128 bool HasSDWASdst = false;
129 bool HasSDWAMac = false;
130 bool HasSDWAOutModsVOPC = false;
131 bool HasDPP = false;
132 bool HasDPP8 = false;
133 bool HasDPALU_DPP = false;
134 bool HasDPPSrc1SGPR = false;
135 bool HasPackedFP32Ops = false;
136 bool HasImageInsts = false;
138 bool HasR128A16 = false;
139 bool HasA16 = false;
140 bool HasG16 = false;
141 bool HasNSAEncoding = false;
143 bool GFX10_AEncoding = false;
144 bool GFX10_BEncoding = false;
145 bool HasDLInsts = false;
146 bool HasFmacF64Inst = false;
147 bool HasDot1Insts = false;
148 bool HasDot2Insts = false;
149 bool HasDot3Insts = false;
150 bool HasDot4Insts = false;
151 bool HasDot5Insts = false;
152 bool HasDot6Insts = false;
153 bool HasDot7Insts = false;
154 bool HasDot8Insts = false;
155 bool HasDot9Insts = false;
156 bool HasDot10Insts = false;
157 bool HasDot11Insts = false;
158 bool HasMAIInsts = false;
159 bool HasFP8Insts = false;
161 bool HasPkFmacF16Inst = false;
181 /// The maximum number of instructions that may be placed within an S_CLAUSE,
182 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
183 /// indicates a lack of S_CLAUSE support.
185 bool SupportsSRAMECC = false;
186
187 // This should not be used directly. 'TargetID' tracks the dynamic settings
188 // for SRAMECC.
189 bool EnableSRAMECC = false;
190
191 bool HasNoSdstCMPX = false;
192 bool HasVscnt = false;
193 bool HasGetWaveIdInst = false;
194 bool HasSMemTimeInst = false;
197 bool HasVOP3Literal = false;
198 bool HasNoDataDepHazard = false;
199 bool FlatAddressSpace = false;
200 bool FlatInstOffsets = false;
201 bool FlatGlobalInsts = false;
202 bool FlatScratchInsts = false;
205 bool EnableFlatScratch = false;
207 bool HasGDS = false;
208 bool HasGWS = false;
209 bool AddNoCarryInsts = false;
210 bool HasUnpackedD16VMem = false;
211 bool LDSMisalignedBug = false;
214 bool UnalignedDSAccess = false;
215 bool HasPackedTID = false;
216 bool ScalarizeGlobal = false;
217 bool HasSALUFloatInsts = false;
221
228 bool HasNSAtoVMEMBug = false;
229 bool HasNSAClauseBug = false;
230 bool HasOffset3fBug = false;
236 bool Has1_5xVGPRs = false;
237 bool HasMADIntraFwdBug = false;
238 bool HasVOPDInsts = false;
242
243 bool RequiresCOV6 = false;
244
245 // Dummy feature to use for assembler in tablegen.
246 bool FeatureDisable = false;
247
249private:
250 SIInstrInfo InstrInfo;
251 SITargetLowering TLInfo;
252 SIFrameLowering FrameLowering;
253
254public:
255 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
256 const GCNTargetMachine &TM);
257 ~GCNSubtarget() override;
258
260 StringRef GPU, StringRef FS);
261
262 /// Diagnose inconsistent subtarget features before attempting to codegen
263 /// function \p F.
264 void checkSubtargetFeatures(const Function &F) const;
265
266 const SIInstrInfo *getInstrInfo() const override {
267 return &InstrInfo;
268 }
269
270 const SIFrameLowering *getFrameLowering() const override {
271 return &FrameLowering;
272 }
273
274 const SITargetLowering *getTargetLowering() const override {
275 return &TLInfo;
276 }
277
278 const SIRegisterInfo *getRegisterInfo() const override {
279 return &InstrInfo.getRegisterInfo();
280 }
281
282 const CallLowering *getCallLowering() const override {
283 return CallLoweringInfo.get();
284 }
285
286 const InlineAsmLowering *getInlineAsmLowering() const override {
287 return InlineAsmLoweringInfo.get();
288 }
289
291 return InstSelector.get();
292 }
293
294 const LegalizerInfo *getLegalizerInfo() const override {
295 return Legalizer.get();
296 }
297
298 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
299 return RegBankInfo.get();
300 }
301
303 return TargetID;
304 }
305
306 // Nothing implemented, just prevent crashes on use.
308 return &TSInfo;
309 }
310
312 return &InstrItins;
313 }
314
316
318 return (Generation)Gen;
319 }
320
321 unsigned getMaxWaveScratchSize() const {
322 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
323 if (getGeneration() >= GFX12) {
324 // 18-bit field in units of 64-dword.
325 return (64 * 4) * ((1 << 18) - 1);
326 }
327 if (getGeneration() == GFX11) {
328 // 15-bit field in units of 64-dword.
329 return (64 * 4) * ((1 << 15) - 1);
330 }
331 // 13-bit field in units of 256-dword.
332 return (256 * 4) * ((1 << 13) - 1);
333 }
334
335 /// Return the number of high bits known to be zero for a frame index.
338 }
339
340 int getLDSBankCount() const {
341 return LDSBankCount;
342 }
343
344 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
345 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
346 }
347
348 unsigned getConstantBusLimit(unsigned Opcode) const;
349
350 /// Returns if the result of this instruction with a 16-bit result returned in
351 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
352 /// the original value.
353 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
354
355 bool supportsWGP() const { return getGeneration() >= GFX10; }
356
357 bool hasIntClamp() const {
358 return HasIntClamp;
359 }
360
361 bool hasFP64() const {
362 return FP64;
363 }
364
365 bool hasMIMG_R128() const {
366 return MIMG_R128;
367 }
368
369 bool hasHWFP64() const {
370 return FP64;
371 }
372
373 bool hasHalfRate64Ops() const {
374 return HalfRate64Ops;
375 }
376
377 bool hasFullRate64Ops() const {
378 return FullRate64Ops;
379 }
380
381 bool hasAddr64() const {
383 }
384
385 bool hasFlat() const {
387 }
388
389 // Return true if the target only has the reverse operand versions of VALU
390 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
391 bool hasOnlyRevVALUShifts() const {
393 }
394
395 bool hasFractBug() const {
397 }
398
399 bool hasBFE() const {
400 return true;
401 }
402
403 bool hasBFI() const {
404 return true;
405 }
406
407 bool hasBFM() const {
408 return hasBFE();
409 }
410
411 bool hasBCNT(unsigned Size) const {
412 return true;
413 }
414
415 bool hasFFBL() const {
416 return true;
417 }
418
419 bool hasFFBH() const {
420 return true;
421 }
422
423 bool hasMed3_16() const {
425 }
426
427 bool hasMin3Max3_16() const {
429 }
430
431 bool hasFmaMixInsts() const {
432 return HasFmaMixInsts;
433 }
434
435 bool hasCARRY() const {
436 return true;
437 }
438
439 bool hasFMA() const {
440 return FMA;
441 }
442
443 bool hasSwap() const {
444 return GFX9Insts;
445 }
446
447 bool hasScalarPackInsts() const {
448 return GFX9Insts;
449 }
450
451 bool hasScalarMulHiInsts() const {
452 return GFX9Insts;
453 }
454
455 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
456
459 }
460
462 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
463 return getGeneration() >= GFX9;
464 }
465
466 /// True if the offset field of DS instructions works as expected. On SI, the
467 /// offset uses a 16-bit adder and does not always wrap properly.
468 bool hasUsableDSOffset() const {
469 return getGeneration() >= SEA_ISLANDS;
470 }
471
474 }
475
476 /// Condition output from div_scale is usable.
479 }
480
481 /// Extra wait hazard is needed in some cases before
482 /// s_cbranch_vccnz/s_cbranch_vccz.
483 bool hasReadVCCZBug() const {
484 return getGeneration() <= SEA_ISLANDS;
485 }
486
487 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
489 return getGeneration() >= GFX10;
490 }
491
492 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
493 /// was written by a VALU instruction.
496 }
497
498 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
499 /// SGPR was written by a VALU Instruction.
502 }
503
504 bool hasRFEHazards() const {
506 }
507
508 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
509 unsigned getSetRegWaitStates() const {
510 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
511 }
512
513 bool dumpCode() const {
514 return DumpCode;
515 }
516
517 /// Return the amount of LDS that can be used that will not restrict the
518 /// occupancy lower than WaveCount.
519 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
520 const Function &) const;
521
524 }
525
526 /// \returns If target supports S_DENORM_MODE.
527 bool hasDenormModeInst() const {
529 }
530
531 bool useFlatForGlobal() const {
532 return FlatForGlobal;
533 }
534
535 /// \returns If target supports ds_read/write_b128 and user enables generation
536 /// of ds_read/write_b128.
537 bool useDS128() const {
538 return CIInsts && EnableDS128;
539 }
540
541 /// \return If target supports ds_read/write_b96/128.
542 bool hasDS96AndDS128() const {
543 return CIInsts;
544 }
545
546 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
547 bool haveRoundOpsF64() const {
548 return CIInsts;
549 }
550
551 /// \returns If MUBUF instructions always perform range checking, even for
552 /// buffer resources used for private memory access.
555 }
556
557 /// \returns If target requires PRT Struct NULL support (zero result registers
558 /// for sparse texture support).
559 bool usePRTStrictNull() const {
560 return EnablePRTStrictNull;
561 }
562
565 }
566
567 /// \returns true if the target supports backing off of s_barrier instructions
568 /// when an exception is raised.
570 return BackOffBarrier;
571 }
572
575 }
576
579 }
580
581 bool hasUnalignedDSAccess() const {
582 return UnalignedDSAccess;
583 }
584
587 }
588
591 }
592
594 return UnalignedAccessMode;
595 }
596
597 bool hasApertureRegs() const {
598 return HasApertureRegs;
599 }
600
601 bool isTrapHandlerEnabled() const {
602 return TrapHandler;
603 }
604
605 bool isXNACKEnabled() const {
606 return TargetID.isXnackOnOrAny();
607 }
608
609 bool isTgSplitEnabled() const {
610 return EnableTgSplit;
611 }
612
613 bool isCuModeEnabled() const {
614 return EnableCuMode;
615 }
616
618
619 bool hasFlatAddressSpace() const {
620 return FlatAddressSpace;
621 }
622
623 bool hasFlatScrRegister() const {
624 return hasFlatAddressSpace();
625 }
626
627 bool hasFlatInstOffsets() const {
628 return FlatInstOffsets;
629 }
630
631 bool hasFlatGlobalInsts() const {
632 return FlatGlobalInsts;
633 }
634
635 bool hasFlatScratchInsts() const {
636 return FlatScratchInsts;
637 }
638
639 // Check if target supports ST addressing mode with FLAT scratch instructions.
640 // The ST addressing mode means no registers are used, either VGPR or SGPR,
641 // but only immediate offset is swizzled and added to the FLAT scratch base.
642 bool hasFlatScratchSTMode() const {
644 }
645
646 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
647
650 }
651
652 bool enableFlatScratch() const {
653 return flatScratchIsArchitected() ||
655 }
656
657 bool hasGlobalAddTidInsts() const {
658 return GFX10_BEncoding;
659 }
660
661 bool hasAtomicCSub() const {
662 return GFX10_BEncoding;
663 }
664
665 bool hasExportInsts() const {
666 return !hasGFX940Insts();
667 }
668
669 bool hasVINTERPEncoding() const {
670 return GFX11Insts;
671 }
672
673 // DS_ADD_F64/DS_ADD_RTN_F64
674 bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
675
677 return getGeneration() >= GFX9;
678 }
679
682 }
683
685 return getGeneration() > GFX9;
686 }
687
688 bool hasD16LoadStore() const {
689 return getGeneration() >= GFX9;
690 }
691
694 }
695
696 bool hasD16Images() const {
698 }
699
700 /// Return if most LDS instructions have an m0 use that require m0 to be
701 /// initialized.
702 bool ldsRequiresM0Init() const {
703 return getGeneration() < GFX9;
704 }
705
706 // True if the hardware rewinds and replays GWS operations if a wave is
707 // preempted.
708 //
709 // If this is false, a GWS operation requires testing if a nack set the
710 // MEM_VIOL bit, and repeating if so.
711 bool hasGWSAutoReplay() const {
712 return getGeneration() >= GFX9;
713 }
714
715 /// \returns if target has ds_gws_sema_release_all instruction.
716 bool hasGWSSemaReleaseAll() const {
717 return CIInsts;
718 }
719
720 /// \returns true if the target has integer add/sub instructions that do not
721 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
722 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
723 /// for saturation.
724 bool hasAddNoCarry() const {
725 return AddNoCarryInsts;
726 }
727
728 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
729
730 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
731
732 bool hasUnpackedD16VMem() const {
733 return HasUnpackedD16VMem;
734 }
735
736 // Covers VS/PS/CS graphics shaders
737 bool isMesaGfxShader(const Function &F) const {
738 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
739 }
740
741 bool hasMad64_32() const {
742 return getGeneration() >= SEA_ISLANDS;
743 }
744
745 bool hasSDWAOmod() const {
746 return HasSDWAOmod;
747 }
748
749 bool hasSDWAScalar() const {
750 return HasSDWAScalar;
751 }
752
753 bool hasSDWASdst() const {
754 return HasSDWASdst;
755 }
756
757 bool hasSDWAMac() const {
758 return HasSDWAMac;
759 }
760
761 bool hasSDWAOutModsVOPC() const {
762 return HasSDWAOutModsVOPC;
763 }
764
765 bool hasDLInsts() const {
766 return HasDLInsts;
767 }
768
769 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
770
771 bool hasDot1Insts() const {
772 return HasDot1Insts;
773 }
774
775 bool hasDot2Insts() const {
776 return HasDot2Insts;
777 }
778
779 bool hasDot3Insts() const {
780 return HasDot3Insts;
781 }
782
783 bool hasDot4Insts() const {
784 return HasDot4Insts;
785 }
786
787 bool hasDot5Insts() const {
788 return HasDot5Insts;
789 }
790
791 bool hasDot6Insts() const {
792 return HasDot6Insts;
793 }
794
795 bool hasDot7Insts() const {
796 return HasDot7Insts;
797 }
798
799 bool hasDot8Insts() const {
800 return HasDot8Insts;
801 }
802
803 bool hasDot9Insts() const {
804 return HasDot9Insts;
805 }
806
807 bool hasDot10Insts() const {
808 return HasDot10Insts;
809 }
810
811 bool hasDot11Insts() const {
812 return HasDot11Insts;
813 }
814
815 bool hasMAIInsts() const {
816 return HasMAIInsts;
817 }
818
819 bool hasFP8Insts() const {
820 return HasFP8Insts;
821 }
822
824
825 bool hasPkFmacF16Inst() const {
826 return HasPkFmacF16Inst;
827 }
828
831 }
832
835 }
836
839 }
840
843 }
844
846
848
849 bool hasAtomicFaddInsts() const {
851 }
852
854
856
859 }
860
863 }
864
867 }
868
871 }
872
874
875 /// \return true if the target has flat, global, and buffer atomic fadd for
876 /// double.
879 }
880
881 /// \return true if the target's flat, global, and buffer atomic fadd for
882 /// float supports denormal handling.
885 }
886
887 /// \return true if atomic operations targeting fine-grained memory work
888 /// correctly at device scope, in allocations in host or peer PCIe device
889 /// memory.
892 }
893
895
898 }
899
900 bool hasNoSdstCMPX() const {
901 return HasNoSdstCMPX;
902 }
903
904 bool hasVscnt() const {
905 return HasVscnt;
906 }
907
908 bool hasGetWaveIdInst() const {
909 return HasGetWaveIdInst;
910 }
911
912 bool hasSMemTimeInst() const {
913 return HasSMemTimeInst;
914 }
915
918 }
919
922 }
923
924 bool hasVOP3Literal() const {
925 return HasVOP3Literal;
926 }
927
928 bool hasNoDataDepHazard() const {
929 return HasNoDataDepHazard;
930 }
931
933 return getGeneration() < SEA_ISLANDS;
934 }
935
936 bool hasInstPrefetch() const {
937 return getGeneration() == GFX10 || getGeneration() == GFX11;
938 }
939
940 bool hasPrefetch() const { return GFX12Insts; }
941
942 // Has s_cmpk_* instructions.
943 bool hasSCmpK() const { return getGeneration() < GFX12; }
944
945 // Scratch is allocated in 256 dword per wave blocks for the entire
946 // wavefront. When viewed from the perspective of an arbitrary workitem, this
947 // is 4-byte aligned.
948 //
949 // Only 4-byte alignment is really needed to access anything. Transformations
950 // on the pointer value itself may rely on the alignment / known low bits of
951 // the pointer. Set this to something above the minimum to avoid needing
952 // dynamic realignment in common cases.
953 Align getStackAlignment() const { return Align(16); }
954
955 bool enableMachineScheduler() const override {
956 return true;
957 }
958
959 bool useAA() const override;
960
961 bool enableSubRegLiveness() const override {
962 return true;
963 }
964
967
968 // static wrappers
969 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
970
971 // XXX - Why is this here if it isn't in the default pass set?
972 bool enableEarlyIfConversion() const override {
973 return true;
974 }
975
977 unsigned NumRegionInstrs) const override;
978
979 void mirFileLoaded(MachineFunction &MF) const override;
980
981 unsigned getMaxNumUserSGPRs() const {
982 return AMDGPU::getMaxNumUserSGPRs(*this);
983 }
984
985 bool hasSMemRealTime() const {
986 return HasSMemRealTime;
987 }
988
989 bool hasMovrel() const {
990 return HasMovrel;
991 }
992
993 bool hasVGPRIndexMode() const {
994 return HasVGPRIndexMode;
995 }
996
997 bool useVGPRIndexMode() const;
998
999 bool hasScalarCompareEq64() const {
1000 return getGeneration() >= VOLCANIC_ISLANDS;
1001 }
1002
1004
1005 bool hasScalarStores() const {
1006 return HasScalarStores;
1007 }
1008
1009 bool hasScalarAtomics() const {
1010 return HasScalarAtomics;
1011 }
1012
1013 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1014 bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
1015
1016 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1017 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1018
1019 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1020 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1021
1022 bool hasDPP() const {
1023 return HasDPP;
1024 }
1025
1026 bool hasDPPBroadcasts() const {
1027 return HasDPP && getGeneration() < GFX10;
1028 }
1029
1031 return HasDPP && getGeneration() < GFX10;
1032 }
1033
1034 bool hasDPP8() const {
1035 return HasDPP8;
1036 }
1037
1038 bool hasDPALU_DPP() const {
1039 return HasDPALU_DPP;
1040 }
1041
1042 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1043
1044 bool hasPackedFP32Ops() const {
1045 return HasPackedFP32Ops;
1046 }
1047
1048 // Has V_PK_MOV_B32 opcode
1049 bool hasPkMovB32() const {
1050 return GFX90AInsts;
1051 }
1052
1054 return getGeneration() >= GFX10 || hasGFX940Insts();
1055 }
1056
1057 bool hasImageInsts() const {
1058 return HasImageInsts;
1059 }
1060
1062 return HasExtendedImageInsts;
1063 }
1064
1065 bool hasR128A16() const {
1066 return HasR128A16;
1067 }
1068
1069 bool hasA16() const { return HasA16; }
1070
1071 bool hasG16() const { return HasG16; }
1072
1073 bool hasOffset3fBug() const {
1074 return HasOffset3fBug;
1075 }
1076
1078
1080
1081 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1082
1084
1086
1087 bool hasNSAEncoding() const { return HasNSAEncoding; }
1088
1089 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1090
1092
1093 unsigned getNSAMaxSize(bool HasSampler = false) const {
1094 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1095 }
1096
1097 bool hasGFX10_AEncoding() const {
1098 return GFX10_AEncoding;
1099 }
1100
1101 bool hasGFX10_BEncoding() const {
1102 return GFX10_BEncoding;
1103 }
1104
1105 bool hasGFX10_3Insts() const {
1106 return GFX10_3Insts;
1107 }
1108
1109 bool hasMadF16() const;
1110
1111 bool hasMovB64() const { return GFX940Insts; }
1112
1113 bool hasLshlAddB64() const { return GFX940Insts; }
1114
1115 bool enableSIScheduler() const {
1116 return EnableSIScheduler;
1117 }
1118
1119 bool loadStoreOptEnabled() const {
1120 return EnableLoadStoreOpt;
1121 }
1122
1123 bool hasSGPRInitBug() const {
1124 return SGPRInitBug;
1125 }
1126
1128 return UserSGPRInit16Bug && isWave32();
1129 }
1130
1132
1135 }
1136
1139 }
1140
1143 }
1144
1145 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1147 return CIInsts;
1148 }
1149
1152 }
1153
1157 }
1158
1161 }
1162
1165 }
1166
1169 }
1170
1173 }
1174
1177 }
1178
1179 bool hasLDSMisalignedBug() const {
1180 return LDSMisalignedBug && !EnableCuMode;
1181 }
1182
1184 return HasInstFwdPrefetchBug;
1185 }
1186
1188 return HasVcmpxExecWARHazard;
1189 }
1190
1193 }
1194
1195 // Shift amount of a 64 bit shift cannot be a highest allocated register
1196 // if also at the end of the allocation block.
1198 return GFX90AInsts && !GFX940Insts;
1199 }
1200
1201 // Has one cycle hazard on transcendental instruction feeding a
1202 // non transcendental VALU.
1203 bool hasTransForwardingHazard() const { return GFX940Insts; }
1204
1205 // Has one cycle hazard on a VALU instruction partially writing dst with
1206 // a shift of result bits feeding another VALU instruction.
1208
1209 // Cannot use op_sel with v_dot instructions.
1210 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1211
1212 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1213 bool hasVDecCoExecHazard() const {
1214 return GFX940Insts;
1215 }
1216
1217 bool hasNSAtoVMEMBug() const {
1218 return HasNSAtoVMEMBug;
1219 }
1220
1221 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1222
1223 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1224
1225 bool hasGFX90AInsts() const { return GFX90AInsts; }
1226
1228 return getGeneration() == GFX10;
1229 }
1230
1231 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1232
1233 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1234
1235 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1236
1238 return getGeneration() == GFX11;
1239 }
1240
1242
1244
1245 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1246
1247 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1248
1249 /// Return if operations acting on VGPR tuples require even alignment.
1250 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1251
1252 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1253 bool hasSPackHL() const { return GFX11Insts; }
1254
1255 /// Return true if the target's EXP instruction has the COMPR flag, which
1256 /// affects the meaning of the EN (enable) bits.
1257 bool hasCompressedExport() const { return !GFX11Insts; }
1258
1259 /// Return true if the target's EXP instruction supports the NULL export
1260 /// target.
1261 bool hasNullExportTarget() const { return !GFX11Insts; }
1262
1263 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1264
1265 bool hasVOPDInsts() const { return HasVOPDInsts; }
1266
1268
1269 /// Return true if the target has the S_DELAY_ALU instruction.
1270 bool hasDelayAlu() const { return GFX11Insts; }
1271
1272 bool hasPackedTID() const { return HasPackedTID; }
1273
1274 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1275 // hasGFX90AInsts is also true.
1276 bool hasGFX940Insts() const { return GFX940Insts; }
1277
1278 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1279
1281
1283
1285
1287
1288 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1289 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1290 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1291
1292 /// \returns The maximum number of instructions that can be enclosed in an
1293 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1294 /// instruction.
1295 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1296
1297 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1298 /// SGPRs
1299 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1300
1301 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1302 /// VGPRs
1303 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1304
1305 /// Return occupancy for the given function. Used LDS and a number of
1306 /// registers if provided.
1307 /// Note, occupancy can be affected by the scratch allocation as well, but
1308 /// we do not have enough information to compute it.
1309 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1310 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1311
1312 /// \returns true if the flat_scratch register should be initialized with the
1313 /// pointer to the wave's scratch memory rather than a size and offset.
1316 }
1317
1318 /// \returns true if the flat_scratch register is initialized by the HW.
1319 /// In this case it is readonly.
1321
1322 /// \returns true if the architected SGPRs are enabled.
1324
1325 /// \returns true if Global Data Share is supported.
1326 bool hasGDS() const { return HasGDS; }
1327
1328 /// \returns true if Global Wave Sync is supported.
1329 bool hasGWS() const { return HasGWS; }
1330
1331 /// \returns true if the machine has merged shaders in which s0-s7 are
1332 /// reserved by the hardware and user SGPRs start at s8
1333 bool hasMergedShaders() const {
1334 return getGeneration() >= GFX9;
1335 }
1336
1337 // \returns true if the target supports the pre-NGG legacy geometry path.
1338 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1339
1340 // \returns true if preloading kernel arguments is supported.
1341 bool hasKernargPreload() const { return KernargPreload; }
1342
1343 // \returns true if the target has split barriers feature
1344 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1345
1346 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1347 bool hasCvtFP8VOP1Bug() const { return true; }
1348
1349 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1350 // no-return form.
1352
1353 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1354 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1355
1356 // \returns true if the target has IEEE kernel descriptor mode bit
1357 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1358
1359 // \returns true if the target has IEEE fminimum/fmaximum instructions
1360 bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
1361
1362 // \returns true if the target has IEEE fminimum3/fmaximum3 instructions
1363 bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
1364
1365 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1366 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1367
1368 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1369 /// values.
1370 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1371
1372 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1373 // of sign-extending.
1374 bool hasGetPCZeroExtension() const { return GFX12Insts; }
1375
1376 /// \returns SGPR allocation granularity supported by the subtarget.
1377 unsigned getSGPRAllocGranule() const {
1379 }
1380
1381 /// \returns SGPR encoding granularity supported by the subtarget.
1382 unsigned getSGPREncodingGranule() const {
1384 }
1385
1386 /// \returns Total number of SGPRs supported by the subtarget.
1387 unsigned getTotalNumSGPRs() const {
1389 }
1390
1391 /// \returns Addressable number of SGPRs supported by the subtarget.
1392 unsigned getAddressableNumSGPRs() const {
1394 }
1395
1396 /// \returns Minimum number of SGPRs that meets the given number of waves per
1397 /// execution unit requirement supported by the subtarget.
1398 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1399 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1400 }
1401
1402 /// \returns Maximum number of SGPRs that meets the given number of waves per
1403 /// execution unit requirement supported by the subtarget.
1404 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1405 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1406 }
1407
1408 /// \returns Reserved number of SGPRs. This is common
1409 /// utility function called by MachineFunction and
1410 /// Function variants of getReservedNumSGPRs.
1411 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1412 /// \returns Reserved number of SGPRs for given machine function \p MF.
1413 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1414
1415 /// \returns Reserved number of SGPRs for given function \p F.
1416 unsigned getReservedNumSGPRs(const Function &F) const;
1417
1418 /// \returns max num SGPRs. This is the common utility
1419 /// function called by MachineFunction and Function
1420 /// variants of getMaxNumSGPRs.
1421 unsigned getBaseMaxNumSGPRs(const Function &F,
1422 std::pair<unsigned, unsigned> WavesPerEU,
1423 unsigned PreloadedSGPRs,
1424 unsigned ReservedNumSGPRs) const;
1425
1426 /// \returns Maximum number of SGPRs that meets number of waves per execution
1427 /// unit requirement for function \p MF, or number of SGPRs explicitly
1428 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1429 ///
1430 /// \returns Value that meets number of waves per execution unit requirement
1431 /// if explicitly requested value cannot be converted to integer, violates
1432 /// subtarget's specifications, or does not meet number of waves per execution
1433 /// unit requirement.
1434 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1435
1436 /// \returns Maximum number of SGPRs that meets number of waves per execution
1437 /// unit requirement for function \p F, or number of SGPRs explicitly
1438 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1439 ///
1440 /// \returns Value that meets number of waves per execution unit requirement
1441 /// if explicitly requested value cannot be converted to integer, violates
1442 /// subtarget's specifications, or does not meet number of waves per execution
1443 /// unit requirement.
1444 unsigned getMaxNumSGPRs(const Function &F) const;
1445
1446 /// \returns VGPR allocation granularity supported by the subtarget.
1447 unsigned getVGPRAllocGranule() const {
1449 }
1450
1451 /// \returns VGPR encoding granularity supported by the subtarget.
1452 unsigned getVGPREncodingGranule() const {
1454 }
1455
1456 /// \returns Total number of VGPRs supported by the subtarget.
1457 unsigned getTotalNumVGPRs() const {
1459 }
1460
1461 /// \returns Addressable number of architectural VGPRs supported by the
1462 /// subtarget.
1465 }
1466
1467 /// \returns Addressable number of VGPRs supported by the subtarget.
1468 unsigned getAddressableNumVGPRs() const {
1470 }
1471
1472 /// \returns the minimum number of VGPRs that will prevent achieving more than
1473 /// the specified number of waves \p WavesPerEU.
1474 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1475 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1476 }
1477
1478 /// \returns the maximum number of VGPRs that can be used and still achieved
1479 /// at least the specified number of waves \p WavesPerEU.
1480 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1481 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1482 }
1483
1484 /// \returns max num VGPRs. This is the common utility function
1485 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1486 unsigned getBaseMaxNumVGPRs(const Function &F,
1487 std::pair<unsigned, unsigned> WavesPerEU) const;
1488 /// \returns Maximum number of VGPRs that meets number of waves per execution
1489 /// unit requirement for function \p F, or number of VGPRs explicitly
1490 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1491 ///
1492 /// \returns Value that meets number of waves per execution unit requirement
1493 /// if explicitly requested value cannot be converted to integer, violates
1494 /// subtarget's specifications, or does not meet number of waves per execution
1495 /// unit requirement.
1496 unsigned getMaxNumVGPRs(const Function &F) const;
1497
1498 unsigned getMaxNumAGPRs(const Function &F) const {
1499 return getMaxNumVGPRs(F);
1500 }
1501
1502 /// \returns Maximum number of VGPRs that meets number of waves per execution
1503 /// unit requirement for function \p MF, or number of VGPRs explicitly
1504 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1505 ///
1506 /// \returns Value that meets number of waves per execution unit requirement
1507 /// if explicitly requested value cannot be converted to integer, violates
1508 /// subtarget's specifications, or does not meet number of waves per execution
1509 /// unit requirement.
1510 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1511
1512 void getPostRAMutations(
1513 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1514 const override;
1515
1516 std::unique_ptr<ScheduleDAGMutation>
1518
1519 bool isWave32() const {
1520 return getWavefrontSize() == 32;
1521 }
1522
1523 bool isWave64() const {
1524 return getWavefrontSize() == 64;
1525 }
1526
1528 return getRegisterInfo()->getBoolRC();
1529 }
1530
1531 /// \returns Maximum number of work groups per compute unit supported by the
1532 /// subtarget and limited by given \p FlatWorkGroupSize.
1533 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1534 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1535 }
1536
1537 /// \returns Minimum flat work group size supported by the subtarget.
1538 unsigned getMinFlatWorkGroupSize() const override {
1540 }
1541
1542 /// \returns Maximum flat work group size supported by the subtarget.
1543 unsigned getMaxFlatWorkGroupSize() const override {
1545 }
1546
1547 /// \returns Number of waves per execution unit required to support the given
1548 /// \p FlatWorkGroupSize.
1549 unsigned
1550 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1551 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1552 }
1553
1554 /// \returns Minimum number of waves per execution unit supported by the
1555 /// subtarget.
1556 unsigned getMinWavesPerEU() const override {
1558 }
1559
1560 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1561 SDep &Dep,
1562 const TargetSchedModel *SchedModel) const override;
1563
1564 // \returns true if it's beneficial on this subtarget for the scheduler to
1565 // cluster stores as well as loads.
1566 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1567
1568 // \returns the number of address arguments from which to enable MIMG NSA
1569 // on supported architectures.
1570 unsigned getNSAThreshold(const MachineFunction &MF) const;
1571
1572 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1573 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1575 // Currently all targets that support the dealloc VGPRs message also require
1576 // the nop.
1577 return true;
1578 }
1579};
1580
1582public:
1583 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1584
1585 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1586
1587 bool hasDispatchPtr() const { return DispatchPtr; }
1588
1589 bool hasQueuePtr() const { return QueuePtr; }
1590
1591 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1592
1593 bool hasDispatchID() const { return DispatchID; }
1594
1595 bool hasFlatScratchInit() const { return FlatScratchInit; }
1596
1597 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1598
1599 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1600
1601 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1602
1603 unsigned getNumFreeUserSGPRs();
1604
1605 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1606
1607 enum UserSGPRID : unsigned {
1617
1618 // Returns the size in number of SGPRs for preload user SGPR field.
1620 switch (ID) {
1622 return 2;
1624 return 4;
1625 case DispatchPtrID:
1626 return 2;
1627 case QueuePtrID:
1628 return 2;
1630 return 2;
1631 case DispatchIdID:
1632 return 2;
1633 case FlatScratchInitID:
1634 return 2;
1636 return 1;
1637 }
1638 llvm_unreachable("Unknown UserSGPRID.");
1639 }
1640
1641 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1642
1643private:
1644 const GCNSubtarget &ST;
1645
1646 // Private memory buffer
1647 // Compute directly in sgpr[0:1]
1648 // Other shaders indirect 64-bits at sgpr[0:1]
1649 bool ImplicitBufferPtr = false;
1650
1651 bool PrivateSegmentBuffer = false;
1652
1653 bool DispatchPtr = false;
1654
1655 bool QueuePtr = false;
1656
1657 bool KernargSegmentPtr = false;
1658
1659 bool DispatchID = false;
1660
1661 bool FlatScratchInit = false;
1662
1663 bool PrivateSegmentSize = false;
1664
1665 unsigned NumKernargPreloadSGPRs = 0;
1666
1667 unsigned NumUsedUserSGPRs = 0;
1668};
1669
1670} // end namespace llvm
1671
1672#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
Definition: GCNSubtarget.h:940
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition: GCNSubtarget.h:883
bool hasFlat() const
Definition: GCNSubtarget.h:385
bool hasD16Images() const
Definition: GCNSubtarget.h:696
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:845
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:745
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:227
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:488
bool hasSwap() const
Definition: GCNSubtarget.h:443
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:825
bool HasAtomicFMinFMaxF64FlatInsts
Definition: GCNSubtarget.h:165
bool hasDot2Insts() const
Definition: GCNSubtarget.h:775
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:688
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:749
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:569
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:999
bool has1_5xVGPRs() const
int getLDSBankCount() const
Definition: GCNSubtarget.h:340
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:391
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:477
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:468
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:961
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition: GCNSubtarget.h:841
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:674
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:684
bool flatScratchIsPointer() const
bool hasSDWAMac() const
Definition: GCNSubtarget.h:757
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:823
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:795
bool hasApertureRegs() const
Definition: GCNSubtarget.h:597
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:472
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:627
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:932
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition: GCNSubtarget.h:837
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:894
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:908
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:965
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:765
bool hasExtendedImageInsts() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:411
bool hasMAIInsts() const
Definition: GCNSubtarget.h:815
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition: GCNSubtarget.h:890
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:635
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:676
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:369
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:527
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:912
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:585
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:266
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:771
bool hasDot3Insts() const
Definition: GCNSubtarget.h:779
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:286
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:218
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:563
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:853
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:311
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:196
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
Definition: GCNSubtarget.h:953
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:553
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:455
bool hasDot11Insts() const
Definition: GCNSubtarget.h:811
bool enableFlatScratch() const
Definition: GCNSubtarget.h:652
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:573
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:235
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:657
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:623
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:461
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:609
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:873
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:819
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:278
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
bool hasVscnt() const
Definition: GCNSubtarget.h:904
bool hasMad64_32() const
Definition: GCNSubtarget.h:741
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:290
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:117
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:537
bool hasExtendedWaitCounts() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:692
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:769
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:936
unsigned maxHardClauseLength() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:737
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:665
bool hasDPP() const
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:669
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:298
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:457
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:613
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:270
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:589
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:307
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:761
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition: GCNSubtarget.h:829
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:302
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:648
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:513
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:928
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:581
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:427
bool hasIntClamp() const
Definition: GCNSubtarget.h:357
bool hasGFX10_AEncoding() const
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:680
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:509
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:274
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:791
bool hasGFX940Insts() const
bool hasLshlAddB64() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:377
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:601
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:955
bool hasLDSFPAtomicAddF64() const
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:167
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:631
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
Definition: GCNSubtarget.h:162
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:966
bool HasAtomicFMinFMaxF32FlatInsts
Definition: GCNSubtarget.h:164
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:730
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:336
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:920
bool hasSDWASdst() const
Definition: GCNSubtarget.h:753
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:180
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:447
bool hasFFBL() const
Definition: GCNSubtarget.h:415
bool hasNSAEncoding() const
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:985
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:619
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:559
bool hasMovB64() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition: GCNSubtarget.h:833
bool hasMed3_16() const
Definition: GCNSubtarget.h:423
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:989
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:847
bool hasBFI() const
Definition: GCNSubtarget.h:403
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:702
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:224
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:993
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:172
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:577
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:344
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool HasRequiredExportPriority
Definition: GCNSubtarget.h:241
bool hasFMA() const
Definition: GCNSubtarget.h:439
bool hasDot10Insts() const
Definition: GCNSubtarget.h:807
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:522
bool HasAtomicBufferPkAddBF16Inst
Definition: GCNSubtarget.h:175
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:419
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:646
bool supportsWGP() const
Definition: GCNSubtarget.h:355
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:373
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:849
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:171
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:204
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:857
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:365
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
Definition: GCNSubtarget.h:869
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition: GCNSubtarget.h:179
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
Definition: GCNSubtarget.h:787
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:981
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:855
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:184
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition: GCNSubtarget.h:877
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:407
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:547
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:799
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:451
bool hasSCmpK() const
Definition: GCNSubtarget.h:943
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:294
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:542
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
Definition: GCNSubtarget.h:163
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:531
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:248
Generation getGeneration() const
Definition: GCNSubtarget.h:317
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:924
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:861
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:900
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:605
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:728
bool hasIEEEMinMax3() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:732
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:972
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:494
bool hasRFEHazards() const
Definition: GCNSubtarget.h:504
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:500
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:642
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:716
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:865
bool hasAddr64() const
Definition: GCNSubtarget.h:381
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:223
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:174
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:593
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:483
bool isWave64() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:431
bool hasCARRY() const
Definition: GCNSubtarget.h:435
bool hasPackedTID() const
bool hasFP64() const
Definition: GCNSubtarget.h:361
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:724
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:916
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:94
bool hasFractBug() const
Definition: GCNSubtarget.h:395
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:617
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:321
bool HasMemoryAtomicFaddF32DenormalSupport
Definition: GCNSubtarget.h:170
bool hasDot4Insts() const
Definition: GCNSubtarget.h:783
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:803
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:661
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:896
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:282
bool hasBFE() const
Definition: GCNSubtarget.h:399
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:711
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition: GCNSubtarget.h:177
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
bool hasPrivateSegmentSize() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.