LLVM 17.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
24
25#define GET_SUBTARGETINFO_HEADER
26#include "AMDGPUGenSubtargetInfo.inc"
27
28namespace llvm {
29
30class GCNTargetMachine;
31
33 public AMDGPUSubtarget {
34public:
36
37 // Following 2 enums are documented at:
38 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
39 enum class TrapHandlerAbi {
40 NONE = 0x00,
41 AMDHSA = 0x01,
42 };
43
44 enum class TrapID {
45 LLVMAMDHSATrap = 0x02,
47 };
48
49private:
50 /// GlobalISel related APIs.
51 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
52 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
53 std::unique_ptr<InstructionSelector> InstSelector;
54 std::unique_ptr<LegalizerInfo> Legalizer;
55 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
56
57protected:
58 // Basic subtarget description.
61 unsigned Gen = INVALID;
63 int LDSBankCount = 0;
65
66 // Possibly statically set by tablegen, but may want to be overridden.
67 bool FastFMAF32 = false;
68 bool FastDenormalF32 = false;
69 bool HalfRate64Ops = false;
70 bool FullRate64Ops = false;
71
72 // Dynamically set bits that enable features.
73 bool FlatForGlobal = false;
75 bool BackOffBarrier = false;
77 bool UnalignedAccessMode = false;
78 bool HasApertureRegs = false;
79 bool SupportsXNACK = false;
80
81 // This should not be used directly. 'TargetID' tracks the dynamic settings
82 // for XNACK.
83 bool EnableXNACK = false;
84
85 bool EnableTgSplit = false;
86 bool EnableCuMode = false;
87 bool TrapHandler = false;
88
89 // Used as options.
90 bool EnableLoadStoreOpt = false;
92 bool EnableSIScheduler = false;
93 bool EnableDS128 = false;
94 bool EnablePRTStrictNull = false;
95 bool DumpCode = false;
96
97 // Subtarget statically properties set by tablegen
98 bool FP64 = false;
99 bool FMA = false;
100 bool MIMG_R128 = false;
101 bool CIInsts = false;
102 bool GFX8Insts = false;
103 bool GFX9Insts = false;
104 bool GFX90AInsts = false;
105 bool GFX940Insts = false;
106 bool GFX10Insts = false;
107 bool GFX11Insts = false;
108 bool GFX10_3Insts = false;
109 bool GFX7GFX8GFX9Insts = false;
110 bool SGPRInitBug = false;
111 bool UserSGPRInit16Bug = false;
114 bool HasSMemRealTime = false;
115 bool HasIntClamp = false;
116 bool HasFmaMixInsts = false;
117 bool HasMovrel = false;
118 bool HasVGPRIndexMode = false;
119 bool HasScalarStores = false;
120 bool HasScalarAtomics = false;
121 bool HasSDWAOmod = false;
122 bool HasSDWAScalar = false;
123 bool HasSDWASdst = false;
124 bool HasSDWAMac = false;
125 bool HasSDWAOutModsVOPC = false;
126 bool HasDPP = false;
127 bool HasDPP8 = false;
128 bool Has64BitDPP = false;
129 bool HasPackedFP32Ops = false;
130 bool HasImageInsts = false;
132 bool HasR128A16 = false;
133 bool HasA16 = false;
134 bool HasG16 = false;
135 bool HasNSAEncoding = false;
136 unsigned NSAMaxSize = 0;
137 bool GFX10_AEncoding = false;
138 bool GFX10_BEncoding = false;
139 bool HasDLInsts = false;
140 bool HasFmacF64Inst = false;
141 bool HasDot1Insts = false;
142 bool HasDot2Insts = false;
143 bool HasDot3Insts = false;
144 bool HasDot4Insts = false;
145 bool HasDot5Insts = false;
146 bool HasDot6Insts = false;
147 bool HasDot7Insts = false;
148 bool HasDot8Insts = false;
149 bool HasDot9Insts = false;
150 bool HasDot10Insts = false;
151 bool HasMAIInsts = false;
152 bool HasFP8Insts = false;
153 bool HasPkFmacF16Inst = false;
158 bool SupportsSRAMECC = false;
159
160 // This should not be used directly. 'TargetID' tracks the dynamic settings
161 // for SRAMECC.
162 bool EnableSRAMECC = false;
163
164 bool HasNoSdstCMPX = false;
165 bool HasVscnt = false;
166 bool HasGetWaveIdInst = false;
167 bool HasSMemTimeInst = false;
169 bool HasVOP3Literal = false;
170 bool HasNoDataDepHazard = false;
171 bool FlatAddressSpace = false;
172 bool FlatInstOffsets = false;
173 bool FlatGlobalInsts = false;
174 bool FlatScratchInsts = false;
177 bool EnableFlatScratch = false;
178 bool AddNoCarryInsts = false;
179 bool HasUnpackedD16VMem = false;
180 bool LDSMisalignedBug = false;
183 bool UnalignedDSAccess = false;
184 bool HasPackedTID = false;
185 bool ScalarizeGlobal = false;
186
193 bool HasNSAtoVMEMBug = false;
194 bool HasNSAClauseBug = false;
195 bool HasOffset3fBug = false;
199 bool HasGFX11FullVGPRs = false;
200 bool HasMADIntraFwdBug = false;
201 bool HasVOPDInsts = false;
203
204 // Dummy feature to use for assembler in tablegen.
205 bool FeatureDisable = false;
206
208private:
209 SIInstrInfo InstrInfo;
210 SITargetLowering TLInfo;
211 SIFrameLowering FrameLowering;
212
213public:
214 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
215 const GCNTargetMachine &TM);
216 ~GCNSubtarget() override;
217
219 StringRef GPU, StringRef FS);
220
221 const SIInstrInfo *getInstrInfo() const override {
222 return &InstrInfo;
223 }
224
225 const SIFrameLowering *getFrameLowering() const override {
226 return &FrameLowering;
227 }
228
229 const SITargetLowering *getTargetLowering() const override {
230 return &TLInfo;
231 }
232
233 const SIRegisterInfo *getRegisterInfo() const override {
234 return &InstrInfo.getRegisterInfo();
235 }
236
237 const CallLowering *getCallLowering() const override {
238 return CallLoweringInfo.get();
239 }
240
241 const InlineAsmLowering *getInlineAsmLowering() const override {
242 return InlineAsmLoweringInfo.get();
243 }
244
246 return InstSelector.get();
247 }
248
249 const LegalizerInfo *getLegalizerInfo() const override {
250 return Legalizer.get();
251 }
252
253 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
254 return RegBankInfo.get();
255 }
256
258 return TargetID;
259 }
260
261 // Nothing implemented, just prevent crashes on use.
263 return &TSInfo;
264 }
265
267 return &InstrItins;
268 }
269
271
273 return (Generation)Gen;
274 }
275
276 unsigned getMaxWaveScratchSize() const {
277 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
278 if (getGeneration() < GFX11) {
279 // 13-bit field in units of 256-dword.
280 return (256 * 4) * ((1 << 13) - 1);
281 }
282 // 15-bit field in units of 64-dword.
283 return (64 * 4) * ((1 << 15) - 1);
284 }
285
286 /// Return the number of high bits known to be zero for a frame index.
289 }
290
291 int getLDSBankCount() const {
292 return LDSBankCount;
293 }
294
295 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
296 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
297 }
298
299 unsigned getConstantBusLimit(unsigned Opcode) const;
300
301 /// Returns if the result of this instruction with a 16-bit result returned in
302 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
303 /// the original value.
304 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
305
306 bool supportsWGP() const { return getGeneration() >= GFX10; }
307
308 bool hasIntClamp() const {
309 return HasIntClamp;
310 }
311
312 bool hasFP64() const {
313 return FP64;
314 }
315
316 bool hasMIMG_R128() const {
317 return MIMG_R128;
318 }
319
320 bool hasHWFP64() const {
321 return FP64;
322 }
323
324 bool hasFastFMAF32() const {
325 return FastFMAF32;
326 }
327
328 bool hasHalfRate64Ops() const {
329 return HalfRate64Ops;
330 }
331
332 bool hasFullRate64Ops() const {
333 return FullRate64Ops;
334 }
335
336 bool hasAddr64() const {
338 }
339
340 bool hasFlat() const {
342 }
343
344 // Return true if the target only has the reverse operand versions of VALU
345 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
346 bool hasOnlyRevVALUShifts() const {
348 }
349
350 bool hasFractBug() const {
352 }
353
354 bool hasBFE() const {
355 return true;
356 }
357
358 bool hasBFI() const {
359 return true;
360 }
361
362 bool hasBFM() const {
363 return hasBFE();
364 }
365
366 bool hasBCNT(unsigned Size) const {
367 return true;
368 }
369
370 bool hasFFBL() const {
371 return true;
372 }
373
374 bool hasFFBH() const {
375 return true;
376 }
377
378 bool hasMed3_16() const {
380 }
381
382 bool hasMin3Max3_16() const {
384 }
385
386 bool hasFmaMixInsts() const {
387 return HasFmaMixInsts;
388 }
389
390 bool hasCARRY() const {
391 return true;
392 }
393
394 bool hasFMA() const {
395 return FMA;
396 }
397
398 bool hasSwap() const {
399 return GFX9Insts;
400 }
401
402 bool hasScalarPackInsts() const {
403 return GFX9Insts;
404 }
405
406 bool hasScalarMulHiInsts() const {
407 return GFX9Insts;
408 }
409
412 }
413
415 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
416 return getGeneration() >= GFX9;
417 }
418
419 /// True if the offset field of DS instructions works as expected. On SI, the
420 /// offset uses a 16-bit adder and does not always wrap properly.
421 bool hasUsableDSOffset() const {
422 return getGeneration() >= SEA_ISLANDS;
423 }
424
427 }
428
429 /// Condition output from div_scale is usable.
432 }
433
434 /// Extra wait hazard is needed in some cases before
435 /// s_cbranch_vccnz/s_cbranch_vccz.
436 bool hasReadVCCZBug() const {
437 return getGeneration() <= SEA_ISLANDS;
438 }
439
440 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
442 return getGeneration() >= GFX10;
443 }
444
445 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
446 /// was written by a VALU instruction.
449 }
450
451 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
452 /// SGPR was written by a VALU Instruction.
455 }
456
457 bool hasRFEHazards() const {
459 }
460
461 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
462 unsigned getSetRegWaitStates() const {
463 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
464 }
465
466 bool dumpCode() const {
467 return DumpCode;
468 }
469
470 /// Return the amount of LDS that can be used that will not restrict the
471 /// occupancy lower than WaveCount.
472 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
473 const Function &) const;
474
477 }
478
479 /// \returns If target supports S_DENORM_MODE.
480 bool hasDenormModeInst() const {
482 }
483
484 bool useFlatForGlobal() const {
485 return FlatForGlobal;
486 }
487
488 /// \returns If target supports ds_read/write_b128 and user enables generation
489 /// of ds_read/write_b128.
490 bool useDS128() const {
491 return CIInsts && EnableDS128;
492 }
493
494 /// \return If target supports ds_read/write_b96/128.
495 bool hasDS96AndDS128() const {
496 return CIInsts;
497 }
498
499 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
500 bool haveRoundOpsF64() const {
501 return CIInsts;
502 }
503
504 /// \returns If MUBUF instructions always perform range checking, even for
505 /// buffer resources used for private memory access.
508 }
509
510 /// \returns If target requires PRT Struct NULL support (zero result registers
511 /// for sparse texture support).
512 bool usePRTStrictNull() const {
513 return EnablePRTStrictNull;
514 }
515
518 }
519
520 /// \returns true if the target supports backing off of s_barrier instructions
521 /// when an exception is raised.
523 return BackOffBarrier;
524 }
525
528 }
529
532 }
533
534 bool hasUnalignedDSAccess() const {
535 return UnalignedDSAccess;
536 }
537
540 }
541
544 }
545
547 return UnalignedAccessMode;
548 }
549
550 bool hasApertureRegs() const {
551 return HasApertureRegs;
552 }
553
554 bool isTrapHandlerEnabled() const {
555 return TrapHandler;
556 }
557
558 bool isXNACKEnabled() const {
559 return TargetID.isXnackOnOrAny();
560 }
561
562 bool isTgSplitEnabled() const {
563 return EnableTgSplit;
564 }
565
566 bool isCuModeEnabled() const {
567 return EnableCuMode;
568 }
569
570 bool hasFlatAddressSpace() const {
571 return FlatAddressSpace;
572 }
573
574 bool hasFlatScrRegister() const {
575 return hasFlatAddressSpace();
576 }
577
578 bool hasFlatInstOffsets() const {
579 return FlatInstOffsets;
580 }
581
582 bool hasFlatGlobalInsts() const {
583 return FlatGlobalInsts;
584 }
585
586 bool hasFlatScratchInsts() const {
587 return FlatScratchInsts;
588 }
589
590 // Check if target supports ST addressing mode with FLAT scratch instructions.
591 // The ST addressing mode means no registers are used, either VGPR or SGPR,
592 // but only immediate offset is swizzled and added to the FLAT scratch base.
593 bool hasFlatScratchSTMode() const {
595 }
596
597 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
598
601 }
602
603 bool enableFlatScratch() const {
604 return flatScratchIsArchitected() ||
606 }
607
608 bool hasGlobalAddTidInsts() const {
609 return GFX10_BEncoding;
610 }
611
612 bool hasAtomicCSub() const {
613 return GFX10_BEncoding;
614 }
615
617 return getGeneration() >= GFX9;
618 }
619
622 }
623
625 return getGeneration() > GFX9;
626 }
627
628 bool hasD16LoadStore() const {
629 return getGeneration() >= GFX9;
630 }
631
634 }
635
636 bool hasD16Images() const {
638 }
639
640 /// Return if most LDS instructions have an m0 use that require m0 to be
641 /// initialized.
642 bool ldsRequiresM0Init() const {
643 return getGeneration() < GFX9;
644 }
645
646 // True if the hardware rewinds and replays GWS operations if a wave is
647 // preempted.
648 //
649 // If this is false, a GWS operation requires testing if a nack set the
650 // MEM_VIOL bit, and repeating if so.
651 bool hasGWSAutoReplay() const {
652 return getGeneration() >= GFX9;
653 }
654
655 /// \returns if target has ds_gws_sema_release_all instruction.
656 bool hasGWSSemaReleaseAll() const {
657 return CIInsts;
658 }
659
660 /// \returns true if the target has integer add/sub instructions that do not
661 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
662 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
663 /// for saturation.
664 bool hasAddNoCarry() const {
665 return AddNoCarryInsts;
666 }
667
668 bool hasUnpackedD16VMem() const {
669 return HasUnpackedD16VMem;
670 }
671
672 // Covers VS/PS/CS graphics shaders
673 bool isMesaGfxShader(const Function &F) const {
674 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
675 }
676
677 bool hasMad64_32() const {
678 return getGeneration() >= SEA_ISLANDS;
679 }
680
681 bool hasSDWAOmod() const {
682 return HasSDWAOmod;
683 }
684
685 bool hasSDWAScalar() const {
686 return HasSDWAScalar;
687 }
688
689 bool hasSDWASdst() const {
690 return HasSDWASdst;
691 }
692
693 bool hasSDWAMac() const {
694 return HasSDWAMac;
695 }
696
697 bool hasSDWAOutModsVOPC() const {
698 return HasSDWAOutModsVOPC;
699 }
700
701 bool hasDLInsts() const {
702 return HasDLInsts;
703 }
704
705 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
706
707 bool hasDot1Insts() const {
708 return HasDot1Insts;
709 }
710
711 bool hasDot2Insts() const {
712 return HasDot2Insts;
713 }
714
715 bool hasDot3Insts() const {
716 return HasDot3Insts;
717 }
718
719 bool hasDot4Insts() const {
720 return HasDot4Insts;
721 }
722
723 bool hasDot5Insts() const {
724 return HasDot5Insts;
725 }
726
727 bool hasDot6Insts() const {
728 return HasDot6Insts;
729 }
730
731 bool hasDot7Insts() const {
732 return HasDot7Insts;
733 }
734
735 bool hasDot8Insts() const {
736 return HasDot8Insts;
737 }
738
739 bool hasDot9Insts() const {
740 return HasDot9Insts;
741 }
742
743 bool hasDot10Insts() const {
744 return HasDot10Insts;
745 }
746
747 bool hasMAIInsts() const {
748 return HasMAIInsts;
749 }
750
751 bool hasFP8Insts() const {
752 return HasFP8Insts;
753 }
754
755 bool hasPkFmacF16Inst() const {
756 return HasPkFmacF16Inst;
757 }
758
759 bool hasAtomicFaddInsts() const {
761 }
762
764
766
768
770
771 bool hasNoSdstCMPX() const {
772 return HasNoSdstCMPX;
773 }
774
775 bool hasVscnt() const {
776 return HasVscnt;
777 }
778
779 bool hasGetWaveIdInst() const {
780 return HasGetWaveIdInst;
781 }
782
783 bool hasSMemTimeInst() const {
784 return HasSMemTimeInst;
785 }
786
789 }
790
791 bool hasVOP3Literal() const {
792 return HasVOP3Literal;
793 }
794
795 bool hasNoDataDepHazard() const {
796 return HasNoDataDepHazard;
797 }
798
800 return getGeneration() < SEA_ISLANDS;
801 }
802
803 bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
804
805 // Scratch is allocated in 256 dword per wave blocks for the entire
806 // wavefront. When viewed from the perspective of an arbitrary workitem, this
807 // is 4-byte aligned.
808 //
809 // Only 4-byte alignment is really needed to access anything. Transformations
810 // on the pointer value itself may rely on the alignment / known low bits of
811 // the pointer. Set this to something above the minimum to avoid needing
812 // dynamic realignment in common cases.
813 Align getStackAlignment() const { return Align(16); }
814
815 bool enableMachineScheduler() const override {
816 return true;
817 }
818
819 bool useAA() const override;
820
821 bool enableSubRegLiveness() const override {
822 return true;
823 }
824
827
828 // static wrappers
829 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
830
831 // XXX - Why is this here if it isn't in the default pass set?
832 bool enableEarlyIfConversion() const override {
833 return true;
834 }
835
837 unsigned NumRegionInstrs) const override;
838
839 unsigned getMaxNumUserSGPRs() const {
840 return 16;
841 }
842
843 bool hasSMemRealTime() const {
844 return HasSMemRealTime;
845 }
846
847 bool hasMovrel() const {
848 return HasMovrel;
849 }
850
851 bool hasVGPRIndexMode() const {
852 return HasVGPRIndexMode;
853 }
854
855 bool useVGPRIndexMode() const;
856
857 bool hasScalarCompareEq64() const {
859 }
860
861 bool hasScalarStores() const {
862 return HasScalarStores;
863 }
864
865 bool hasScalarAtomics() const {
866 return HasScalarAtomics;
867 }
868
869 bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
870
871 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
872 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
873
874 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
875 bool hasPermLane64() const { return getGeneration() >= GFX11; }
876
877 bool hasDPP() const {
878 return HasDPP;
879 }
880
881 bool hasDPPBroadcasts() const {
882 return HasDPP && getGeneration() < GFX10;
883 }
884
886 return HasDPP && getGeneration() < GFX10;
887 }
888
889 bool hasDPP8() const {
890 return HasDPP8;
891 }
892
893 bool has64BitDPP() const {
894 return Has64BitDPP;
895 }
896
897 bool hasPackedFP32Ops() const {
898 return HasPackedFP32Ops;
899 }
900
902 return getGeneration() >= GFX10 || hasGFX940Insts();
903 }
904
905 bool hasImageInsts() const {
906 return HasImageInsts;
907 }
908
911 }
912
913 bool hasR128A16() const {
914 return HasR128A16;
915 }
916
917 bool hasA16() const { return HasA16; }
918
919 bool hasG16() const { return HasG16; }
920
921 bool hasOffset3fBug() const {
922 return HasOffset3fBug;
923 }
924
926
928
929 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
930
931 bool hasNSAEncoding() const { return HasNSAEncoding; }
932
933 unsigned getNSAMaxSize() const { return NSAMaxSize; }
934
935 bool hasGFX10_AEncoding() const {
936 return GFX10_AEncoding;
937 }
938
939 bool hasGFX10_BEncoding() const {
940 return GFX10_BEncoding;
941 }
942
943 bool hasGFX10_3Insts() const {
944 return GFX10_3Insts;
945 }
946
947 bool hasMadF16() const;
948
949 bool hasMovB64() const { return GFX940Insts; }
950
951 bool hasLshlAddB64() const { return GFX940Insts; }
952
953 bool enableSIScheduler() const {
954 return EnableSIScheduler;
955 }
956
957 bool loadStoreOptEnabled() const {
958 return EnableLoadStoreOpt;
959 }
960
961 bool hasSGPRInitBug() const {
962 return SGPRInitBug;
963 }
964
965 bool hasUserSGPRInit16Bug() const {
966 return UserSGPRInit16Bug && isWave32();
967 }
968
970
973 }
974
977 }
978
981 }
982
983 // \returns true if the subtarget supports DWORDX3 load/store instructions.
984 bool hasDwordx3LoadStores() const {
985 return CIInsts;
986 }
987
990 }
991
995 }
996
999 }
1000
1003 }
1004
1007 }
1008
1011 }
1012
1015 }
1016
1017 bool hasLDSMisalignedBug() const {
1018 return LDSMisalignedBug && !EnableCuMode;
1019 }
1020
1022 return HasInstFwdPrefetchBug;
1023 }
1024
1026 return HasVcmpxExecWARHazard;
1027 }
1028
1031 }
1032
1033 // Shift amount of a 64 bit shift cannot be a highest allocated register
1034 // if also at the end of the allocation block.
1036 return GFX90AInsts && !GFX940Insts;
1037 }
1038
1039 // Has one cycle hazard on transcendental instruction feeding a
1040 // non transcendental VALU.
1041 bool hasTransForwardingHazard() const { return GFX940Insts; }
1042
1043 // Has one cycle hazard on a VALU instruction partially writing dst with
1044 // a shift of result bits feeding another VALU instruction.
1046
1047 // Cannot use op_sel with v_dot instructions.
1048 bool hasDOTOpSelHazard() const { return GFX940Insts; }
1049
1050 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1051 bool hasVDecCoExecHazard() const {
1052 return GFX940Insts;
1053 }
1054
1055 bool hasNSAtoVMEMBug() const {
1056 return HasNSAtoVMEMBug;
1057 }
1058
1059 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1060
1061 bool hasHardClauses() const { return getGeneration() >= GFX10; }
1062
1063 bool hasGFX90AInsts() const { return GFX90AInsts; }
1064
1066 return getGeneration() == GFX10;
1067 }
1068
1069 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1070
1071 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1072
1074 return getGeneration() >= GFX11;
1075 }
1076
1078
1079 bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1080
1081 /// Return if operations acting on VGPR tuples require even alignment.
1082 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1083
1084 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1085 bool hasSPackHL() const { return GFX11Insts; }
1086
1087 /// Return true if the target's EXP instruction has the COMPR flag, which
1088 /// affects the meaning of the EN (enable) bits.
1089 bool hasCompressedExport() const { return !GFX11Insts; }
1090
1091 /// Return true if the target's EXP instruction supports the NULL export
1092 /// target.
1093 bool hasNullExportTarget() const { return !GFX11Insts; }
1094
1095 bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1096
1097 bool hasVOPDInsts() const { return HasVOPDInsts; }
1098
1100
1101 /// Return true if the target has the S_DELAY_ALU instruction.
1102 bool hasDelayAlu() const { return GFX11Insts; }
1103
1104 bool hasPackedTID() const { return HasPackedTID; }
1105
1106 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1107 // hasGFX90AInsts is also true.
1108 bool hasGFX940Insts() const { return GFX940Insts; }
1109
1110 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1111 /// SGPRs
1112 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1113
1114 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1115 /// VGPRs
1116 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1117
1118 /// Return occupancy for the given function. Used LDS and a number of
1119 /// registers if provided.
1120 /// Note, occupancy can be affected by the scratch allocation as well, but
1121 /// we do not have enough information to compute it.
1122 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1123 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1124
1125 /// \returns true if the flat_scratch register should be initialized with the
1126 /// pointer to the wave's scratch memory rather than a size and offset.
1129 }
1130
1131 /// \returns true if the flat_scratch register is initialized by the HW.
1132 /// In this case it is readonly.
1134
1135 /// \returns true if the machine has merged shaders in which s0-s7 are
1136 /// reserved by the hardware and user SGPRs start at s8
1137 bool hasMergedShaders() const {
1138 return getGeneration() >= GFX9;
1139 }
1140
1141 // \returns true if the target supports the pre-NGG legacy geometry path.
1142 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1143
1144 /// \returns SGPR allocation granularity supported by the subtarget.
1145 unsigned getSGPRAllocGranule() const {
1147 }
1148
1149 /// \returns SGPR encoding granularity supported by the subtarget.
1150 unsigned getSGPREncodingGranule() const {
1152 }
1153
1154 /// \returns Total number of SGPRs supported by the subtarget.
1155 unsigned getTotalNumSGPRs() const {
1157 }
1158
1159 /// \returns Addressable number of SGPRs supported by the subtarget.
1160 unsigned getAddressableNumSGPRs() const {
1162 }
1163
1164 /// \returns Minimum number of SGPRs that meets the given number of waves per
1165 /// execution unit requirement supported by the subtarget.
1166 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1167 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1168 }
1169
1170 /// \returns Maximum number of SGPRs that meets the given number of waves per
1171 /// execution unit requirement supported by the subtarget.
1172 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1173 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1174 }
1175
1176 /// \returns Reserved number of SGPRs. This is common
1177 /// utility function called by MachineFunction and
1178 /// Function variants of getReservedNumSGPRs.
1179 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1180 /// \returns Reserved number of SGPRs for given machine function \p MF.
1181 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1182
1183 /// \returns Reserved number of SGPRs for given function \p F.
1184 unsigned getReservedNumSGPRs(const Function &F) const;
1185
1186 /// \returns max num SGPRs. This is the common utility
1187 /// function called by MachineFunction and Function
1188 /// variants of getMaxNumSGPRs.
1189 unsigned getBaseMaxNumSGPRs(const Function &F,
1190 std::pair<unsigned, unsigned> WavesPerEU,
1191 unsigned PreloadedSGPRs,
1192 unsigned ReservedNumSGPRs) const;
1193
1194 /// \returns Maximum number of SGPRs that meets number of waves per execution
1195 /// unit requirement for function \p MF, or number of SGPRs explicitly
1196 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1197 ///
1198 /// \returns Value that meets number of waves per execution unit requirement
1199 /// if explicitly requested value cannot be converted to integer, violates
1200 /// subtarget's specifications, or does not meet number of waves per execution
1201 /// unit requirement.
1202 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1203
1204 /// \returns Maximum number of SGPRs that meets number of waves per execution
1205 /// unit requirement for function \p F, or number of SGPRs explicitly
1206 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1207 ///
1208 /// \returns Value that meets number of waves per execution unit requirement
1209 /// if explicitly requested value cannot be converted to integer, violates
1210 /// subtarget's specifications, or does not meet number of waves per execution
1211 /// unit requirement.
1212 unsigned getMaxNumSGPRs(const Function &F) const;
1213
1214 /// \returns VGPR allocation granularity supported by the subtarget.
1215 unsigned getVGPRAllocGranule() const {
1217 }
1218
1219 /// \returns VGPR encoding granularity supported by the subtarget.
1220 unsigned getVGPREncodingGranule() const {
1222 }
1223
1224 /// \returns Total number of VGPRs supported by the subtarget.
1225 unsigned getTotalNumVGPRs() const {
1227 }
1228
1229 /// \returns Addressable number of VGPRs supported by the subtarget.
1230 unsigned getAddressableNumVGPRs() const {
1232 }
1233
1234 /// \returns the minimum number of VGPRs that will prevent achieving more than
1235 /// the specified number of waves \p WavesPerEU.
1236 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1237 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1238 }
1239
1240 /// \returns the maximum number of VGPRs that can be used and still achieved
1241 /// at least the specified number of waves \p WavesPerEU.
1242 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1243 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1244 }
1245
1246 /// \returns max num VGPRs. This is the common utility function
1247 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1248 unsigned getBaseMaxNumVGPRs(const Function &F,
1249 std::pair<unsigned, unsigned> WavesPerEU) const;
1250 /// \returns Maximum number of VGPRs that meets number of waves per execution
1251 /// unit requirement for function \p F, or number of VGPRs explicitly
1252 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1253 ///
1254 /// \returns Value that meets number of waves per execution unit requirement
1255 /// if explicitly requested value cannot be converted to integer, violates
1256 /// subtarget's specifications, or does not meet number of waves per execution
1257 /// unit requirement.
1258 unsigned getMaxNumVGPRs(const Function &F) const;
1259
1260 unsigned getMaxNumAGPRs(const Function &F) const {
1261 return getMaxNumVGPRs(F);
1262 }
1263
1264 /// \returns Maximum number of VGPRs that meets number of waves per execution
1265 /// unit requirement for function \p MF, or number of VGPRs explicitly
1266 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1267 ///
1268 /// \returns Value that meets number of waves per execution unit requirement
1269 /// if explicitly requested value cannot be converted to integer, violates
1270 /// subtarget's specifications, or does not meet number of waves per execution
1271 /// unit requirement.
1272 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1273
1274 void getPostRAMutations(
1275 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1276 const override;
1277
1278 std::unique_ptr<ScheduleDAGMutation>
1280
1281 bool isWave32() const {
1282 return getWavefrontSize() == 32;
1283 }
1284
1285 bool isWave64() const {
1286 return getWavefrontSize() == 64;
1287 }
1288
1290 return getRegisterInfo()->getBoolRC();
1291 }
1292
1293 /// \returns Maximum number of work groups per compute unit supported by the
1294 /// subtarget and limited by given \p FlatWorkGroupSize.
1295 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1296 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1297 }
1298
1299 /// \returns Minimum flat work group size supported by the subtarget.
1300 unsigned getMinFlatWorkGroupSize() const override {
1302 }
1303
1304 /// \returns Maximum flat work group size supported by the subtarget.
1305 unsigned getMaxFlatWorkGroupSize() const override {
1307 }
1308
1309 /// \returns Number of waves per execution unit required to support the given
1310 /// \p FlatWorkGroupSize.
1311 unsigned
1312 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1313 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1314 }
1315
1316 /// \returns Minimum number of waves per execution unit supported by the
1317 /// subtarget.
1318 unsigned getMinWavesPerEU() const override {
1320 }
1321
1322 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1323 SDep &Dep) const override;
1324
1325 // \returns true if it's beneficial on this subtarget for the scheduler to
1326 // cluster stores as well as loads.
1327 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1328
1329 // \returns the number of address arguments from which to enable MIMG NSA
1330 // on supported architectures.
1331 unsigned getNSAThreshold(const MachineFunction &MF) const;
1332};
1333
1334} // end namespace llvm
1335
1336#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
return InstrInfo
unsigned UseOpIdx
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasFlat() const
Definition: GCNSubtarget.h:340
bool hasD16Images() const
Definition: GCNSubtarget.h:636
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:62
bool useVGPRIndexMode() const
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:681
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:192
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:441
bool hasSwap() const
Definition: GCNSubtarget.h:398
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:755
bool hasDot2Insts() const
Definition: GCNSubtarget.h:711
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:628
bool hasMergedShaders() const
bool hasA16() const
Definition: GCNSubtarget.h:917
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:685
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:522
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:857
int getLDSBankCount() const
Definition: GCNSubtarget.h:291
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:346
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:925
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:430
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:421
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:957
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:821
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:885
unsigned getSGPRAllocGranule() const
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:624
bool flatScratchIsPointer() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
bool hasSDWAMac() const
Definition: GCNSubtarget.h:693
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:731
bool hasApertureRegs() const
Definition: GCNSubtarget.h:550
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:64
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:425
bool hasFPAtomicToDenormModeHazard() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:578
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:799
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:779
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:825
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:701
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:909
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:366
bool hasMAIInsts() const
Definition: GCNSubtarget.h:747
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:586
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:616
bool hasHWFP64() const
Definition: GCNSubtarget.h:320
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:480
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:975
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:783
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:538
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:969
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:221
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:74
bool hasDot1Insts() const
Definition: GCNSubtarget.h:707
bool hasDot3Insts() const
Definition: GCNSubtarget.h:715
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:929
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:241
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:516
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:763
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:266
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:943
Align getStackAlignment() const
Definition: GCNSubtarget.h:813
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:506
bool hasAtomicPkFaddNoRtnInsts() const
Definition: GCNSubtarget.h:767
bool enableFlatScratch() const
Definition: GCNSubtarget.h:603
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:526
bool hasR128A16() const
Definition: GCNSubtarget.h:913
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:921
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:984
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:608
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:961
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:574
bool hasPermLane64() const
Definition: GCNSubtarget.h:875
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:414
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:562
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:769
bool hasFP8Insts() const
Definition: GCNSubtarget.h:751
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:988
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:233
bool has64BitDPP() const
Definition: GCNSubtarget.h:893
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:901
bool hasVscnt() const
Definition: GCNSubtarget.h:775
bool hasMad64_32() const
Definition: GCNSubtarget.h:677
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:245
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:113
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:490
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:632
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:705
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:803
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:673
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
Definition: GCNSubtarget.h:965
bool hasDPP() const
Definition: GCNSubtarget.h:877
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:253
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:410
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:566
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:865
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:225
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:542
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:262
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:697
bool hasGFX11FullVGPRs() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:257
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:599
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:979
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:869
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:466
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:795
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:534
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:382
bool hasIntClamp() const
Definition: GCNSubtarget.h:308
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:935
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:620
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:462
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:229
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:897
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:727
bool hasGFX940Insts() const
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:951
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:332
bool hasScalarStores() const
Definition: GCNSubtarget.h:861
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:554
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:815
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:582
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:826
bool hasReadM0LdsDmaHazard() const
Definition: GCNSubtarget.h:997
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:287
bool hasSDWASdst() const
Definition: GCNSubtarget.h:689
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:402
bool hasFFBL() const
Definition: GCNSubtarget.h:370
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:931
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:843
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:570
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:881
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:512
bool hasMovB64() const
Definition: GCNSubtarget.h:949
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:378
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:847
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:324
bool hasBFI() const
Definition: GCNSubtarget.h:358
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:642
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:189
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:851
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:530
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:295
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
Definition: GCNSubtarget.h:905
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:927
bool hasFMA() const
Definition: GCNSubtarget.h:394
bool hasDot10Insts() const
Definition: GCNSubtarget.h:743
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:475
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:971
bool hasFFBH() const
Definition: GCNSubtarget.h:374
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:597
bool supportsWGP() const
Definition: GCNSubtarget.h:306
bool hasG16() const
Definition: GCNSubtarget.h:919
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:328
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:759
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:176
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:316
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:889
bool hasDot5Insts() const
Definition: GCNSubtarget.h:723
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:839
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:765
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:872
bool hasFlatScratchSVSSwizzleBug() const
bool hasVDecCoExecHazard() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:362
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:500
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:992
bool hasDot8Insts() const
Definition: GCNSubtarget.h:735
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:406
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:249
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:495
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:484
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:939
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:207
Generation getGeneration() const
Definition: GCNSubtarget.h:272
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:791
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:771
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:558
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:668
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:832
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:447
bool hasRFEHazards() const
Definition: GCNSubtarget.h:457
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:453
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:593
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:656
bool enableSIScheduler() const
Definition: GCNSubtarget.h:953
bool hasAddr64() const
Definition: GCNSubtarget.h:336
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:188
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:546
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:436
bool isWave64() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:386
bool hasCARRY() const
Definition: GCNSubtarget.h:390
bool hasPackedTID() const
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:933
bool hasFP64() const
Definition: GCNSubtarget.h:312
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:664
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:787
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:91
bool hasFractBug() const
Definition: GCNSubtarget.h:350
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:276
bool hasDot4Insts() const
Definition: GCNSubtarget.h:719
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
~GCNSubtarget() override
bool HasAtomicPkFaddNoRtnInsts
Definition: GCNSubtarget.h:156
bool hasDot9Insts() const
Definition: GCNSubtarget.h:739
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:612
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:60
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:237
bool hasBFE() const
Definition: GCNSubtarget.h:354
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:651
Itinerary data supplied by a subtarget to be used by a target.
Provides the logic to select generic machine instructions.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.