LLVM 18.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
26
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
29
30namespace llvm {
31
32class GCNTargetMachine;
33
35 public AMDGPUSubtarget {
36public:
38
39 // Following 2 enums are documented at:
40 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
41 enum class TrapHandlerAbi {
42 NONE = 0x00,
43 AMDHSA = 0x01,
44 };
45
46 enum class TrapID {
47 LLVMAMDHSATrap = 0x02,
49 };
50
51private:
52 /// GlobalISel related APIs.
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
56 std::unique_ptr<LegalizerInfo> Legalizer;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
58
59protected:
60 // Basic subtarget description.
63 unsigned Gen = INVALID;
65 int LDSBankCount = 0;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81 bool KernargPreload = false;
82
83 // This should not be used directly. 'TargetID' tracks the dynamic settings
84 // for XNACK.
85 bool EnableXNACK = false;
86
87 bool EnableTgSplit = false;
88 bool EnableCuMode = false;
89 bool TrapHandler = false;
90
91 // Used as options.
92 bool EnableLoadStoreOpt = false;
94 bool EnableSIScheduler = false;
95 bool EnableDS128 = false;
96 bool EnablePRTStrictNull = false;
97 bool DumpCode = false;
98
99 // Subtarget statically properties set by tablegen
100 bool FP64 = false;
101 bool FMA = false;
102 bool MIMG_R128 = false;
103 bool CIInsts = false;
104 bool GFX8Insts = false;
105 bool GFX9Insts = false;
106 bool GFX90AInsts = false;
107 bool GFX940Insts = false;
108 bool GFX10Insts = false;
109 bool GFX11Insts = false;
110 bool GFX12Insts = false;
111 bool GFX10_3Insts = false;
112 bool GFX7GFX8GFX9Insts = false;
113 bool SGPRInitBug = false;
114 bool UserSGPRInit16Bug = false;
117 bool HasSMemRealTime = false;
118 bool HasIntClamp = false;
119 bool HasFmaMixInsts = false;
120 bool HasMovrel = false;
121 bool HasVGPRIndexMode = false;
122 bool HasScalarStores = false;
123 bool HasScalarAtomics = false;
124 bool HasSDWAOmod = false;
125 bool HasSDWAScalar = false;
126 bool HasSDWASdst = false;
127 bool HasSDWAMac = false;
128 bool HasSDWAOutModsVOPC = false;
129 bool HasDPP = false;
130 bool HasDPP8 = false;
131 bool HasDPALU_DPP = false;
132 bool HasDPPSrc1SGPR = false;
133 bool HasPackedFP32Ops = false;
134 bool HasImageInsts = false;
136 bool HasR128A16 = false;
137 bool HasA16 = false;
138 bool HasG16 = false;
139 bool HasNSAEncoding = false;
141 bool GFX10_AEncoding = false;
142 bool GFX10_BEncoding = false;
143 bool HasDLInsts = false;
144 bool HasFmacF64Inst = false;
145 bool HasDot1Insts = false;
146 bool HasDot2Insts = false;
147 bool HasDot3Insts = false;
148 bool HasDot4Insts = false;
149 bool HasDot5Insts = false;
150 bool HasDot6Insts = false;
151 bool HasDot7Insts = false;
152 bool HasDot8Insts = false;
153 bool HasDot9Insts = false;
154 bool HasDot10Insts = false;
155 bool HasMAIInsts = false;
156 bool HasFP8Insts = false;
157 bool HasPkFmacF16Inst = false;
167 bool SupportsSRAMECC = false;
168
169 // This should not be used directly. 'TargetID' tracks the dynamic settings
170 // for SRAMECC.
171 bool EnableSRAMECC = false;
172
173 bool HasNoSdstCMPX = false;
174 bool HasVscnt = false;
175 bool HasGetWaveIdInst = false;
176 bool HasSMemTimeInst = false;
178 bool HasVOP3Literal = false;
179 bool HasNoDataDepHazard = false;
180 bool FlatAddressSpace = false;
181 bool FlatInstOffsets = false;
182 bool FlatGlobalInsts = false;
183 bool FlatScratchInsts = false;
186 bool EnableFlatScratch = false;
188 bool HasGDS = false;
189 bool HasGWS = false;
190 bool AddNoCarryInsts = false;
191 bool HasUnpackedD16VMem = false;
192 bool LDSMisalignedBug = false;
195 bool UnalignedDSAccess = false;
196 bool HasPackedTID = false;
197 bool ScalarizeGlobal = false;
198 bool HasSALUFloatInsts = false;
200
207 bool HasNSAtoVMEMBug = false;
208 bool HasNSAClauseBug = false;
209 bool HasOffset3fBug = false;
214 bool HasGFX11FullVGPRs = false;
215 bool HasMADIntraFwdBug = false;
216 bool HasVOPDInsts = false;
219
220 // Dummy feature to use for assembler in tablegen.
221 bool FeatureDisable = false;
222
224private:
225 SIInstrInfo InstrInfo;
226 SITargetLowering TLInfo;
227 SIFrameLowering FrameLowering;
228
229public:
230 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
231 const GCNTargetMachine &TM);
232 ~GCNSubtarget() override;
233
235 StringRef GPU, StringRef FS);
236
237 const SIInstrInfo *getInstrInfo() const override {
238 return &InstrInfo;
239 }
240
241 const SIFrameLowering *getFrameLowering() const override {
242 return &FrameLowering;
243 }
244
245 const SITargetLowering *getTargetLowering() const override {
246 return &TLInfo;
247 }
248
249 const SIRegisterInfo *getRegisterInfo() const override {
250 return &InstrInfo.getRegisterInfo();
251 }
252
253 const CallLowering *getCallLowering() const override {
254 return CallLoweringInfo.get();
255 }
256
257 const InlineAsmLowering *getInlineAsmLowering() const override {
258 return InlineAsmLoweringInfo.get();
259 }
260
262 return InstSelector.get();
263 }
264
265 const LegalizerInfo *getLegalizerInfo() const override {
266 return Legalizer.get();
267 }
268
269 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
270 return RegBankInfo.get();
271 }
272
274 return TargetID;
275 }
276
277 // Nothing implemented, just prevent crashes on use.
279 return &TSInfo;
280 }
281
283 return &InstrItins;
284 }
285
287
289 return (Generation)Gen;
290 }
291
292 unsigned getMaxWaveScratchSize() const {
293 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
294 if (getGeneration() < GFX11) {
295 // 13-bit field in units of 256-dword.
296 return (256 * 4) * ((1 << 13) - 1);
297 }
298 // 15-bit field in units of 64-dword.
299 return (64 * 4) * ((1 << 15) - 1);
300 }
301
302 /// Return the number of high bits known to be zero for a frame index.
305 }
306
307 int getLDSBankCount() const {
308 return LDSBankCount;
309 }
310
311 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
312 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
313 }
314
315 unsigned getConstantBusLimit(unsigned Opcode) const;
316
317 /// Returns if the result of this instruction with a 16-bit result returned in
318 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
319 /// the original value.
320 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
321
322 bool supportsWGP() const { return getGeneration() >= GFX10; }
323
324 bool hasIntClamp() const {
325 return HasIntClamp;
326 }
327
328 bool hasFP64() const {
329 return FP64;
330 }
331
332 bool hasMIMG_R128() const {
333 return MIMG_R128;
334 }
335
336 bool hasHWFP64() const {
337 return FP64;
338 }
339
340 bool hasHalfRate64Ops() const {
341 return HalfRate64Ops;
342 }
343
344 bool hasFullRate64Ops() const {
345 return FullRate64Ops;
346 }
347
348 bool hasAddr64() const {
350 }
351
352 bool hasFlat() const {
354 }
355
356 // Return true if the target only has the reverse operand versions of VALU
357 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
358 bool hasOnlyRevVALUShifts() const {
360 }
361
362 bool hasFractBug() const {
364 }
365
366 bool hasBFE() const {
367 return true;
368 }
369
370 bool hasBFI() const {
371 return true;
372 }
373
374 bool hasBFM() const {
375 return hasBFE();
376 }
377
378 bool hasBCNT(unsigned Size) const {
379 return true;
380 }
381
382 bool hasFFBL() const {
383 return true;
384 }
385
386 bool hasFFBH() const {
387 return true;
388 }
389
390 bool hasMed3_16() const {
392 }
393
394 bool hasMin3Max3_16() const {
396 }
397
398 bool hasFmaMixInsts() const {
399 return HasFmaMixInsts;
400 }
401
402 bool hasCARRY() const {
403 return true;
404 }
405
406 bool hasFMA() const {
407 return FMA;
408 }
409
410 bool hasSwap() const {
411 return GFX9Insts;
412 }
413
414 bool hasScalarPackInsts() const {
415 return GFX9Insts;
416 }
417
418 bool hasScalarMulHiInsts() const {
419 return GFX9Insts;
420 }
421
424 }
425
427 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
428 return getGeneration() >= GFX9;
429 }
430
431 /// True if the offset field of DS instructions works as expected. On SI, the
432 /// offset uses a 16-bit adder and does not always wrap properly.
433 bool hasUsableDSOffset() const {
434 return getGeneration() >= SEA_ISLANDS;
435 }
436
439 }
440
441 /// Condition output from div_scale is usable.
444 }
445
446 /// Extra wait hazard is needed in some cases before
447 /// s_cbranch_vccnz/s_cbranch_vccz.
448 bool hasReadVCCZBug() const {
449 return getGeneration() <= SEA_ISLANDS;
450 }
451
452 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
454 return getGeneration() >= GFX10;
455 }
456
457 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
458 /// was written by a VALU instruction.
461 }
462
463 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
464 /// SGPR was written by a VALU Instruction.
467 }
468
469 bool hasRFEHazards() const {
471 }
472
473 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
474 unsigned getSetRegWaitStates() const {
475 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
476 }
477
478 bool dumpCode() const {
479 return DumpCode;
480 }
481
482 /// Return the amount of LDS that can be used that will not restrict the
483 /// occupancy lower than WaveCount.
484 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
485 const Function &) const;
486
489 }
490
491 /// \returns If target supports S_DENORM_MODE.
492 bool hasDenormModeInst() const {
494 }
495
496 bool useFlatForGlobal() const {
497 return FlatForGlobal;
498 }
499
500 /// \returns If target supports ds_read/write_b128 and user enables generation
501 /// of ds_read/write_b128.
502 bool useDS128() const {
503 return CIInsts && EnableDS128;
504 }
505
506 /// \return If target supports ds_read/write_b96/128.
507 bool hasDS96AndDS128() const {
508 return CIInsts;
509 }
510
511 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
512 bool haveRoundOpsF64() const {
513 return CIInsts;
514 }
515
516 /// \returns If MUBUF instructions always perform range checking, even for
517 /// buffer resources used for private memory access.
520 }
521
522 /// \returns If target requires PRT Struct NULL support (zero result registers
523 /// for sparse texture support).
524 bool usePRTStrictNull() const {
525 return EnablePRTStrictNull;
526 }
527
530 }
531
532 /// \returns true if the target supports backing off of s_barrier instructions
533 /// when an exception is raised.
535 return BackOffBarrier;
536 }
537
540 }
541
544 }
545
546 bool hasUnalignedDSAccess() const {
547 return UnalignedDSAccess;
548 }
549
552 }
553
556 }
557
559 return UnalignedAccessMode;
560 }
561
562 bool hasApertureRegs() const {
563 return HasApertureRegs;
564 }
565
566 bool isTrapHandlerEnabled() const {
567 return TrapHandler;
568 }
569
570 bool isXNACKEnabled() const {
571 return TargetID.isXnackOnOrAny();
572 }
573
574 bool isTgSplitEnabled() const {
575 return EnableTgSplit;
576 }
577
578 bool isCuModeEnabled() const {
579 return EnableCuMode;
580 }
581
582 bool hasFlatAddressSpace() const {
583 return FlatAddressSpace;
584 }
585
586 bool hasFlatScrRegister() const {
587 return hasFlatAddressSpace();
588 }
589
590 bool hasFlatInstOffsets() const {
591 return FlatInstOffsets;
592 }
593
594 bool hasFlatGlobalInsts() const {
595 return FlatGlobalInsts;
596 }
597
598 bool hasFlatScratchInsts() const {
599 return FlatScratchInsts;
600 }
601
602 // Check if target supports ST addressing mode with FLAT scratch instructions.
603 // The ST addressing mode means no registers are used, either VGPR or SGPR,
604 // but only immediate offset is swizzled and added to the FLAT scratch base.
605 bool hasFlatScratchSTMode() const {
607 }
608
609 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
610
613 }
614
615 bool enableFlatScratch() const {
616 return flatScratchIsArchitected() ||
618 }
619
620 bool hasGlobalAddTidInsts() const {
621 return GFX10_BEncoding;
622 }
623
624 bool hasAtomicCSub() const {
625 return GFX10_BEncoding;
626 }
627
629 return getGeneration() >= GFX9;
630 }
631
634 }
635
637 return getGeneration() > GFX9;
638 }
639
640 bool hasD16LoadStore() const {
641 return getGeneration() >= GFX9;
642 }
643
646 }
647
648 bool hasD16Images() const {
650 }
651
652 /// Return if most LDS instructions have an m0 use that require m0 to be
653 /// initialized.
654 bool ldsRequiresM0Init() const {
655 return getGeneration() < GFX9;
656 }
657
658 // True if the hardware rewinds and replays GWS operations if a wave is
659 // preempted.
660 //
661 // If this is false, a GWS operation requires testing if a nack set the
662 // MEM_VIOL bit, and repeating if so.
663 bool hasGWSAutoReplay() const {
664 return getGeneration() >= GFX9;
665 }
666
667 /// \returns if target has ds_gws_sema_release_all instruction.
668 bool hasGWSSemaReleaseAll() const {
669 return CIInsts;
670 }
671
672 /// \returns true if the target has integer add/sub instructions that do not
673 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
674 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
675 /// for saturation.
676 bool hasAddNoCarry() const {
677 return AddNoCarryInsts;
678 }
679
680 bool hasUnpackedD16VMem() const {
681 return HasUnpackedD16VMem;
682 }
683
684 // Covers VS/PS/CS graphics shaders
685 bool isMesaGfxShader(const Function &F) const {
686 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
687 }
688
689 bool hasMad64_32() const {
690 return getGeneration() >= SEA_ISLANDS;
691 }
692
693 bool hasSDWAOmod() const {
694 return HasSDWAOmod;
695 }
696
697 bool hasSDWAScalar() const {
698 return HasSDWAScalar;
699 }
700
701 bool hasSDWASdst() const {
702 return HasSDWASdst;
703 }
704
705 bool hasSDWAMac() const {
706 return HasSDWAMac;
707 }
708
709 bool hasSDWAOutModsVOPC() const {
710 return HasSDWAOutModsVOPC;
711 }
712
713 bool hasDLInsts() const {
714 return HasDLInsts;
715 }
716
717 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
718
719 bool hasDot1Insts() const {
720 return HasDot1Insts;
721 }
722
723 bool hasDot2Insts() const {
724 return HasDot2Insts;
725 }
726
727 bool hasDot3Insts() const {
728 return HasDot3Insts;
729 }
730
731 bool hasDot4Insts() const {
732 return HasDot4Insts;
733 }
734
735 bool hasDot5Insts() const {
736 return HasDot5Insts;
737 }
738
739 bool hasDot6Insts() const {
740 return HasDot6Insts;
741 }
742
743 bool hasDot7Insts() const {
744 return HasDot7Insts;
745 }
746
747 bool hasDot8Insts() const {
748 return HasDot8Insts;
749 }
750
751 bool hasDot9Insts() const {
752 return HasDot9Insts;
753 }
754
755 bool hasDot10Insts() const {
756 return HasDot10Insts;
757 }
758
759 bool hasMAIInsts() const {
760 return HasMAIInsts;
761 }
762
763 bool hasFP8Insts() const {
764 return HasFP8Insts;
765 }
766
767 bool hasPkFmacF16Inst() const {
768 return HasPkFmacF16Inst;
769 }
770
772
774
775 bool hasAtomicFaddInsts() const {
777 }
778
780
782
785 }
786
789 }
790
793 }
794
796
797 bool hasNoSdstCMPX() const {
798 return HasNoSdstCMPX;
799 }
800
801 bool hasVscnt() const {
802 return HasVscnt;
803 }
804
805 bool hasGetWaveIdInst() const {
806 return HasGetWaveIdInst;
807 }
808
809 bool hasSMemTimeInst() const {
810 return HasSMemTimeInst;
811 }
812
815 }
816
817 bool hasVOP3Literal() const {
818 return HasVOP3Literal;
819 }
820
821 bool hasNoDataDepHazard() const {
822 return HasNoDataDepHazard;
823 }
824
826 return getGeneration() < SEA_ISLANDS;
827 }
828
829 bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
830
831 // Scratch is allocated in 256 dword per wave blocks for the entire
832 // wavefront. When viewed from the perspective of an arbitrary workitem, this
833 // is 4-byte aligned.
834 //
835 // Only 4-byte alignment is really needed to access anything. Transformations
836 // on the pointer value itself may rely on the alignment / known low bits of
837 // the pointer. Set this to something above the minimum to avoid needing
838 // dynamic realignment in common cases.
839 Align getStackAlignment() const { return Align(16); }
840
841 bool enableMachineScheduler() const override {
842 return true;
843 }
844
845 bool useAA() const override;
846
847 bool enableSubRegLiveness() const override {
848 return true;
849 }
850
853
854 // static wrappers
855 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
856
857 // XXX - Why is this here if it isn't in the default pass set?
858 bool enableEarlyIfConversion() const override {
859 return true;
860 }
861
863 unsigned NumRegionInstrs) const override;
864
865 unsigned getMaxNumUserSGPRs() const {
866 return AMDGPU::getMaxNumUserSGPRs(*this);
867 }
868
869 bool hasSMemRealTime() const {
870 return HasSMemRealTime;
871 }
872
873 bool hasMovrel() const {
874 return HasMovrel;
875 }
876
877 bool hasVGPRIndexMode() const {
878 return HasVGPRIndexMode;
879 }
880
881 bool useVGPRIndexMode() const;
882
883 bool hasScalarCompareEq64() const {
885 }
886
887 bool hasScalarStores() const {
888 return HasScalarStores;
889 }
890
891 bool hasScalarAtomics() const {
892 return HasScalarAtomics;
893 }
894
895 bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
896
897 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
898 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
899
900 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
901 bool hasPermLane64() const { return getGeneration() >= GFX11; }
902
903 bool hasDPP() const {
904 return HasDPP;
905 }
906
907 bool hasDPPBroadcasts() const {
908 return HasDPP && getGeneration() < GFX10;
909 }
910
912 return HasDPP && getGeneration() < GFX10;
913 }
914
915 bool hasDPP8() const {
916 return HasDPP8;
917 }
918
919 bool hasDPALU_DPP() const {
920 return HasDPALU_DPP;
921 }
922
923 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
924
925 bool hasPackedFP32Ops() const {
926 return HasPackedFP32Ops;
927 }
928
929 // Has V_PK_MOV_B32 opcode
930 bool hasPkMovB32() const {
931 return GFX90AInsts;
932 }
933
935 return getGeneration() >= GFX10 || hasGFX940Insts();
936 }
937
938 bool hasImageInsts() const {
939 return HasImageInsts;
940 }
941
944 }
945
946 bool hasR128A16() const {
947 return HasR128A16;
948 }
949
950 bool hasA16() const { return HasA16; }
951
952 bool hasG16() const { return HasG16; }
953
954 bool hasOffset3fBug() const {
955 return HasOffset3fBug;
956 }
957
959
961
962 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
963
965
966 bool hasNSAEncoding() const { return HasNSAEncoding; }
967
969
970 unsigned getNSAMaxSize(bool HasSampler = false) const {
971 return AMDGPU::getNSAMaxSize(*this, HasSampler);
972 }
973
974 bool hasGFX10_AEncoding() const {
975 return GFX10_AEncoding;
976 }
977
978 bool hasGFX10_BEncoding() const {
979 return GFX10_BEncoding;
980 }
981
982 bool hasGFX10_3Insts() const {
983 return GFX10_3Insts;
984 }
985
986 bool hasMadF16() const;
987
988 bool hasMovB64() const { return GFX940Insts; }
989
990 bool hasLshlAddB64() const { return GFX940Insts; }
991
992 bool enableSIScheduler() const {
993 return EnableSIScheduler;
994 }
995
996 bool loadStoreOptEnabled() const {
997 return EnableLoadStoreOpt;
998 }
999
1000 bool hasSGPRInitBug() const {
1001 return SGPRInitBug;
1002 }
1003
1005 return UserSGPRInit16Bug && isWave32();
1006 }
1007
1009
1012 }
1013
1016 }
1017
1020 }
1021
1022 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1024 return CIInsts;
1025 }
1026
1029 }
1030
1034 }
1035
1038 }
1039
1042 }
1043
1046 }
1047
1050 }
1051
1054 }
1055
1056 bool hasLDSMisalignedBug() const {
1057 return LDSMisalignedBug && !EnableCuMode;
1058 }
1059
1061 return HasInstFwdPrefetchBug;
1062 }
1063
1065 return HasVcmpxExecWARHazard;
1066 }
1067
1070 }
1071
1072 // Shift amount of a 64 bit shift cannot be a highest allocated register
1073 // if also at the end of the allocation block.
1075 return GFX90AInsts && !GFX940Insts;
1076 }
1077
1078 // Has one cycle hazard on transcendental instruction feeding a
1079 // non transcendental VALU.
1080 bool hasTransForwardingHazard() const { return GFX940Insts; }
1081
1082 // Has one cycle hazard on a VALU instruction partially writing dst with
1083 // a shift of result bits feeding another VALU instruction.
1085
1086 // Cannot use op_sel with v_dot instructions.
1087 bool hasDOTOpSelHazard() const { return GFX940Insts; }
1088
1089 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1090 bool hasVDecCoExecHazard() const {
1091 return GFX940Insts;
1092 }
1093
1094 bool hasNSAtoVMEMBug() const {
1095 return HasNSAtoVMEMBug;
1096 }
1097
1098 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1099
1100 bool hasHardClauses() const { return getGeneration() >= GFX10; }
1101
1102 bool hasGFX90AInsts() const { return GFX90AInsts; }
1103
1105 return getGeneration() == GFX10;
1106 }
1107
1108 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1109
1110 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1111
1113 return getGeneration() >= GFX11;
1114 }
1115
1117
1119
1120 bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1121
1122 /// Return if operations acting on VGPR tuples require even alignment.
1123 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1124
1125 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1126 bool hasSPackHL() const { return GFX11Insts; }
1127
1128 /// Return true if the target's EXP instruction has the COMPR flag, which
1129 /// affects the meaning of the EN (enable) bits.
1130 bool hasCompressedExport() const { return !GFX11Insts; }
1131
1132 /// Return true if the target's EXP instruction supports the NULL export
1133 /// target.
1134 bool hasNullExportTarget() const { return !GFX11Insts; }
1135
1136 bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1137
1138 bool hasVOPDInsts() const { return HasVOPDInsts; }
1139
1141
1142 /// Return true if the target has the S_DELAY_ALU instruction.
1143 bool hasDelayAlu() const { return GFX11Insts; }
1144
1145 bool hasPackedTID() const { return HasPackedTID; }
1146
1147 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1148 // hasGFX90AInsts is also true.
1149 bool hasGFX940Insts() const { return GFX940Insts; }
1150
1151 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1152
1154
1155 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1156 /// SGPRs
1157 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1158
1159 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1160 /// VGPRs
1161 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1162
1163 /// Return occupancy for the given function. Used LDS and a number of
1164 /// registers if provided.
1165 /// Note, occupancy can be affected by the scratch allocation as well, but
1166 /// we do not have enough information to compute it.
1167 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1168 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1169
1170 /// \returns true if the flat_scratch register should be initialized with the
1171 /// pointer to the wave's scratch memory rather than a size and offset.
1174 }
1175
1176 /// \returns true if the flat_scratch register is initialized by the HW.
1177 /// In this case it is readonly.
1179
1180 /// \returns true if the architected SGPRs are enabled.
1182
1183 /// \returns true if Global Data Share is supported.
1184 bool hasGDS() const { return HasGDS; }
1185
1186 /// \returns true if Global Wave Sync is supported.
1187 bool hasGWS() const { return HasGWS; }
1188
1189 /// \returns true if the machine has merged shaders in which s0-s7 are
1190 /// reserved by the hardware and user SGPRs start at s8
1191 bool hasMergedShaders() const {
1192 return getGeneration() >= GFX9;
1193 }
1194
1195 // \returns true if the target supports the pre-NGG legacy geometry path.
1196 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1197
1198 // \returns true if preloading kernel arguments is supported.
1199 bool hasKernargPreload() const { return KernargPreload; }
1200
1201 // \returns true if we need to generate backwards compatible code when
1202 // preloading kernel arguments.
1204 return hasKernargPreload() && !hasGFX940Insts();
1205 }
1206
1207 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1208 bool hasCvtFP8VOP1Bug() const { return true; }
1209
1210 // \returns true is CSUB atomics support a no-return form.
1212
1213 /// \returns SGPR allocation granularity supported by the subtarget.
1214 unsigned getSGPRAllocGranule() const {
1216 }
1217
1218 /// \returns SGPR encoding granularity supported by the subtarget.
1219 unsigned getSGPREncodingGranule() const {
1221 }
1222
1223 /// \returns Total number of SGPRs supported by the subtarget.
1224 unsigned getTotalNumSGPRs() const {
1226 }
1227
1228 /// \returns Addressable number of SGPRs supported by the subtarget.
1229 unsigned getAddressableNumSGPRs() const {
1231 }
1232
1233 /// \returns Minimum number of SGPRs that meets the given number of waves per
1234 /// execution unit requirement supported by the subtarget.
1235 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1236 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1237 }
1238
1239 /// \returns Maximum number of SGPRs that meets the given number of waves per
1240 /// execution unit requirement supported by the subtarget.
1241 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1242 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1243 }
1244
1245 /// \returns Reserved number of SGPRs. This is common
1246 /// utility function called by MachineFunction and
1247 /// Function variants of getReservedNumSGPRs.
1248 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1249 /// \returns Reserved number of SGPRs for given machine function \p MF.
1250 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1251
1252 /// \returns Reserved number of SGPRs for given function \p F.
1253 unsigned getReservedNumSGPRs(const Function &F) const;
1254
1255 /// \returns max num SGPRs. This is the common utility
1256 /// function called by MachineFunction and Function
1257 /// variants of getMaxNumSGPRs.
1258 unsigned getBaseMaxNumSGPRs(const Function &F,
1259 std::pair<unsigned, unsigned> WavesPerEU,
1260 unsigned PreloadedSGPRs,
1261 unsigned ReservedNumSGPRs) const;
1262
1263 /// \returns Maximum number of SGPRs that meets number of waves per execution
1264 /// unit requirement for function \p MF, or number of SGPRs explicitly
1265 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1266 ///
1267 /// \returns Value that meets number of waves per execution unit requirement
1268 /// if explicitly requested value cannot be converted to integer, violates
1269 /// subtarget's specifications, or does not meet number of waves per execution
1270 /// unit requirement.
1271 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1272
1273 /// \returns Maximum number of SGPRs that meets number of waves per execution
1274 /// unit requirement for function \p F, or number of SGPRs explicitly
1275 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1276 ///
1277 /// \returns Value that meets number of waves per execution unit requirement
1278 /// if explicitly requested value cannot be converted to integer, violates
1279 /// subtarget's specifications, or does not meet number of waves per execution
1280 /// unit requirement.
1281 unsigned getMaxNumSGPRs(const Function &F) const;
1282
1283 /// \returns VGPR allocation granularity supported by the subtarget.
1284 unsigned getVGPRAllocGranule() const {
1286 }
1287
1288 /// \returns VGPR encoding granularity supported by the subtarget.
1289 unsigned getVGPREncodingGranule() const {
1291 }
1292
1293 /// \returns Total number of VGPRs supported by the subtarget.
1294 unsigned getTotalNumVGPRs() const {
1296 }
1297
1298 /// \returns Addressable number of VGPRs supported by the subtarget.
1299 unsigned getAddressableNumVGPRs() const {
1301 }
1302
1303 /// \returns the minimum number of VGPRs that will prevent achieving more than
1304 /// the specified number of waves \p WavesPerEU.
1305 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1306 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1307 }
1308
1309 /// \returns the maximum number of VGPRs that can be used and still achieved
1310 /// at least the specified number of waves \p WavesPerEU.
1311 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1312 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1313 }
1314
1315 /// \returns max num VGPRs. This is the common utility function
1316 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1317 unsigned getBaseMaxNumVGPRs(const Function &F,
1318 std::pair<unsigned, unsigned> WavesPerEU) const;
1319 /// \returns Maximum number of VGPRs that meets number of waves per execution
1320 /// unit requirement for function \p F, or number of VGPRs explicitly
1321 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1322 ///
1323 /// \returns Value that meets number of waves per execution unit requirement
1324 /// if explicitly requested value cannot be converted to integer, violates
1325 /// subtarget's specifications, or does not meet number of waves per execution
1326 /// unit requirement.
1327 unsigned getMaxNumVGPRs(const Function &F) const;
1328
1329 unsigned getMaxNumAGPRs(const Function &F) const {
1330 return getMaxNumVGPRs(F);
1331 }
1332
1333 /// \returns Maximum number of VGPRs that meets number of waves per execution
1334 /// unit requirement for function \p MF, or number of VGPRs explicitly
1335 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1336 ///
1337 /// \returns Value that meets number of waves per execution unit requirement
1338 /// if explicitly requested value cannot be converted to integer, violates
1339 /// subtarget's specifications, or does not meet number of waves per execution
1340 /// unit requirement.
1341 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1342
1343 void getPostRAMutations(
1344 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1345 const override;
1346
1347 std::unique_ptr<ScheduleDAGMutation>
1349
1350 bool isWave32() const {
1351 return getWavefrontSize() == 32;
1352 }
1353
1354 bool isWave64() const {
1355 return getWavefrontSize() == 64;
1356 }
1357
1359 return getRegisterInfo()->getBoolRC();
1360 }
1361
1362 /// \returns Maximum number of work groups per compute unit supported by the
1363 /// subtarget and limited by given \p FlatWorkGroupSize.
1364 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1365 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1366 }
1367
1368 /// \returns Minimum flat work group size supported by the subtarget.
1369 unsigned getMinFlatWorkGroupSize() const override {
1371 }
1372
1373 /// \returns Maximum flat work group size supported by the subtarget.
1374 unsigned getMaxFlatWorkGroupSize() const override {
1376 }
1377
1378 /// \returns Number of waves per execution unit required to support the given
1379 /// \p FlatWorkGroupSize.
1380 unsigned
1381 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1382 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1383 }
1384
1385 /// \returns Minimum number of waves per execution unit supported by the
1386 /// subtarget.
1387 unsigned getMinWavesPerEU() const override {
1389 }
1390
1391 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1392 SDep &Dep) const override;
1393
1394 // \returns true if it's beneficial on this subtarget for the scheduler to
1395 // cluster stores as well as loads.
1396 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1397
1398 // \returns the number of address arguments from which to enable MIMG NSA
1399 // on supported architectures.
1400 unsigned getNSAThreshold(const MachineFunction &MF) const;
1401
1402 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1403 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1405 // Currently all targets that support the dealloc VGPRs message also require
1406 // the nop.
1407 return true;
1408 }
1409};
1410
1412public:
1413 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1414
1415 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1416
1417 bool hasDispatchPtr() const { return DispatchPtr; }
1418
1419 bool hasQueuePtr() const { return QueuePtr; }
1420
1421 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1422
1423 bool hasDispatchID() const { return DispatchID; }
1424
1425 bool hasFlatScratchInit() const { return FlatScratchInit; }
1426
1427 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1428
1429 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1430
1431 unsigned getNumFreeUserSGPRs();
1432
1433 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1434
1435 enum UserSGPRID : unsigned {
1445
1446 // Returns the size in number of SGPRs for preload user SGPR field.
1448 switch (ID) {
1450 return 2;
1452 return 4;
1453 case DispatchPtrID:
1454 return 2;
1455 case QueuePtrID:
1456 return 2;
1458 return 2;
1459 case DispatchIdID:
1460 return 2;
1461 case FlatScratchInitID:
1462 return 2;
1464 return 1;
1465 }
1466 llvm_unreachable("Unknown UserSGPRID.");
1467 }
1468
1469 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1470
1471private:
1472 const GCNSubtarget &ST;
1473
1474 // Private memory buffer
1475 // Compute directly in sgpr[0:1]
1476 // Other shaders indirect 64-bits at sgpr[0:1]
1477 bool ImplicitBufferPtr = false;
1478
1479 bool PrivateSegmentBuffer = false;
1480
1481 bool DispatchPtr = false;
1482
1483 bool QueuePtr = false;
1484
1485 bool KernargSegmentPtr = false;
1486
1487 bool DispatchID = false;
1488
1489 bool FlatScratchInit = false;
1490
1491 unsigned NumKernargPreloadSGPRs = 0;
1492
1493 unsigned NumUsedUserSGPRs = 0;
1494};
1495
1496} // end namespace llvm
1497
1498#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
static constexpr uint32_t Opcode
Definition: aarch32.h:200
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasFlat() const
Definition: GCNSubtarget.h:352
bool hasD16Images() const
Definition: GCNSubtarget.h:648
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:771
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:693
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:206
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:453
bool hasSwap() const
Definition: GCNSubtarget.h:410
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:767
bool hasDot2Insts() const
Definition: GCNSubtarget.h:723
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:640
bool hasMergedShaders() const
bool hasA16() const
Definition: GCNSubtarget.h:950
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:697
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:534
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:883
int getLDSBankCount() const
Definition: GCNSubtarget.h:307
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:358
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:958
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:442
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:433
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:996
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:847
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:911
unsigned getSGPRAllocGranule() const
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:636
bool flatScratchIsPointer() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
bool hasSDWAMac() const
Definition: GCNSubtarget.h:705
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:743
bool hasApertureRegs() const
Definition: GCNSubtarget.h:562
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:437
bool hasFPAtomicToDenormModeHazard() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:590
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:825
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:805
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:851
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:713
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:942
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:378
bool hasMAIInsts() const
Definition: GCNSubtarget.h:759
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:598
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:628
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:336
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:492
bool hasMFMAInlineLiteralBug() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:809
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:550
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:237
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:719
bool hasDot3Insts() const
Definition: GCNSubtarget.h:727
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:962
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:257
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:199
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:528
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:779
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:282
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
Definition: GCNSubtarget.h:930
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:982
Align getStackAlignment() const
Definition: GCNSubtarget.h:839
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:518
bool enableFlatScratch() const
Definition: GCNSubtarget.h:615
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:538
bool hasR128A16() const
Definition: GCNSubtarget.h:946
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:954
bool hasDwordx3LoadStores() const
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:620
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:586
bool hasPermLane64() const
Definition: GCNSubtarget.h:901
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:426
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:574
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:795
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:763
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:249
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
bool hasMSAALoadDstSelBug() const
Definition: GCNSubtarget.h:964
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:934
bool hasVscnt() const
Definition: GCNSubtarget.h:801
bool hasMad64_32() const
Definition: GCNSubtarget.h:689
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:261
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:116
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:502
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:644
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:717
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:829
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:685
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasDPP() const
Definition: GCNSubtarget.h:903
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:269
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:422
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:578
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:891
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:241
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:554
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:278
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:709
bool hasGFX11FullVGPRs() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:273
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:611
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:895
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:478
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:821
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:546
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:394
bool hasIntClamp() const
Definition: GCNSubtarget.h:324
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:974
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:632
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:474
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:245
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:925
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:739
bool hasGFX940Insts() const
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:990
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:344
bool hasScalarStores() const
Definition: GCNSubtarget.h:887
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:566
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:841
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:159
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:594
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:852
bool hasReadM0LdsDmaHazard() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:303
bool hasSDWASdst() const
Definition: GCNSubtarget.h:701
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:414
bool hasFFBL() const
Definition: GCNSubtarget.h:382
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:966
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:869
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:582
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:907
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:524
bool hasMovB64() const
Definition: GCNSubtarget.h:988
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:390
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:873
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:773
bool needsKernargPreloadBackwardsCompatibility() const
bool hasBFI() const
Definition: GCNSubtarget.h:370
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:654
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:203
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:877
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:163
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:542
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:311
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
Definition: GCNSubtarget.h:938
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:960
bool hasFMA() const
Definition: GCNSubtarget.h:406
bool hasDot10Insts() const
Definition: GCNSubtarget.h:755
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:487
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:386
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:609
bool supportsWGP() const
Definition: GCNSubtarget.h:322
bool hasG16() const
Definition: GCNSubtarget.h:952
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:340
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:775
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:162
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:185
unsigned getNSAMaxSize(bool HasSampler=false) const
Definition: GCNSubtarget.h:970
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:783
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:332
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:915
bool hasDot5Insts() const
Definition: GCNSubtarget.h:735
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:865
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:781
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:898
bool hasFlatScratchSVSSwizzleBug() const
bool hasVDecCoExecHazard() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:374
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:512
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:747
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:418
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:265
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:507
bool hasGWS() const
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:496
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:978
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:223
Generation getGeneration() const
Definition: GCNSubtarget.h:288
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:817
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:787
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:797
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:570
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:680
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:858
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:459
bool hasRFEHazards() const
Definition: GCNSubtarget.h:469
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:465
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:605
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:668
bool hasDPALU_DPP() const
Definition: GCNSubtarget.h:919
bool enableSIScheduler() const
Definition: GCNSubtarget.h:992
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:791
bool hasAddr64() const
Definition: GCNSubtarget.h:348
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:202
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:165
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:558
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:448
bool isWave64() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:398
bool hasCARRY() const
Definition: GCNSubtarget.h:402
bool hasPackedTID() const
bool hasFP64() const
Definition: GCNSubtarget.h:328
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:676
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:813
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:93
bool hasFractBug() const
Definition: GCNSubtarget.h:362
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
Definition: GCNSubtarget.h:923
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:292
bool hasDot4Insts() const
Definition: GCNSubtarget.h:731
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
Definition: GCNSubtarget.h:968
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:751
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:624
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:253
bool hasBFE() const
Definition: GCNSubtarget.h:366
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:663
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:212
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.