LLVM 17.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
46 LLVMAMDHSATrap = 0x02,
48 };
49
50private:
51 /// GlobalISel related APIs.
52 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
53 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
54 std::unique_ptr<InstructionSelector> InstSelector;
55 std::unique_ptr<LegalizerInfo> Legalizer;
56 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
57
58protected:
59 // Basic subtarget description.
62 unsigned Gen = INVALID;
64 int LDSBankCount = 0;
66
67 // Possibly statically set by tablegen, but may want to be overridden.
68 bool FastFMAF32 = false;
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81
82 // This should not be used directly. 'TargetID' tracks the dynamic settings
83 // for XNACK.
84 bool EnableXNACK = false;
85
86 bool EnableTgSplit = false;
87 bool EnableCuMode = false;
88 bool TrapHandler = false;
89
90 // Used as options.
91 bool EnableLoadStoreOpt = false;
93 bool EnableSIScheduler = false;
94 bool EnableDS128 = false;
95 bool EnablePRTStrictNull = false;
96 bool DumpCode = false;
97
98 // Subtarget statically properties set by tablegen
99 bool FP64 = false;
100 bool FMA = false;
101 bool MIMG_R128 = false;
102 bool CIInsts = false;
103 bool GFX8Insts = false;
104 bool GFX9Insts = false;
105 bool GFX90AInsts = false;
106 bool GFX940Insts = false;
107 bool GFX10Insts = false;
108 bool GFX11Insts = false;
109 bool GFX10_3Insts = false;
110 bool GFX7GFX8GFX9Insts = false;
111 bool SGPRInitBug = false;
112 bool UserSGPRInit16Bug = false;
115 bool HasSMemRealTime = false;
116 bool HasIntClamp = false;
117 bool HasFmaMixInsts = false;
118 bool HasMovrel = false;
119 bool HasVGPRIndexMode = false;
120 bool HasScalarStores = false;
121 bool HasScalarAtomics = false;
122 bool HasSDWAOmod = false;
123 bool HasSDWAScalar = false;
124 bool HasSDWASdst = false;
125 bool HasSDWAMac = false;
126 bool HasSDWAOutModsVOPC = false;
127 bool HasDPP = false;
128 bool HasDPP8 = false;
129 bool Has64BitDPP = false;
130 bool HasPackedFP32Ops = false;
131 bool HasImageInsts = false;
133 bool HasR128A16 = false;
134 bool HasA16 = false;
135 bool HasG16 = false;
136 bool HasNSAEncoding = false;
138 bool GFX10_AEncoding = false;
139 bool GFX10_BEncoding = false;
140 bool HasDLInsts = false;
141 bool HasFmacF64Inst = false;
142 bool HasDot1Insts = false;
143 bool HasDot2Insts = false;
144 bool HasDot3Insts = false;
145 bool HasDot4Insts = false;
146 bool HasDot5Insts = false;
147 bool HasDot6Insts = false;
148 bool HasDot7Insts = false;
149 bool HasDot8Insts = false;
150 bool HasDot9Insts = false;
151 bool HasDot10Insts = false;
152 bool HasMAIInsts = false;
153 bool HasFP8Insts = false;
154 bool HasPkFmacF16Inst = false;
163 bool SupportsSRAMECC = false;
164
165 // This should not be used directly. 'TargetID' tracks the dynamic settings
166 // for SRAMECC.
167 bool EnableSRAMECC = false;
168
169 bool HasNoSdstCMPX = false;
170 bool HasVscnt = false;
171 bool HasGetWaveIdInst = false;
172 bool HasSMemTimeInst = false;
174 bool HasVOP3Literal = false;
175 bool HasNoDataDepHazard = false;
176 bool FlatAddressSpace = false;
177 bool FlatInstOffsets = false;
178 bool FlatGlobalInsts = false;
179 bool FlatScratchInsts = false;
182 bool EnableFlatScratch = false;
184 bool AddNoCarryInsts = false;
185 bool HasUnpackedD16VMem = false;
186 bool LDSMisalignedBug = false;
189 bool UnalignedDSAccess = false;
190 bool HasPackedTID = false;
191 bool ScalarizeGlobal = false;
192
199 bool HasNSAtoVMEMBug = false;
200 bool HasNSAClauseBug = false;
201 bool HasOffset3fBug = false;
205 bool HasGFX11FullVGPRs = false;
206 bool HasMADIntraFwdBug = false;
207 bool HasVOPDInsts = false;
210
211 // Dummy feature to use for assembler in tablegen.
212 bool FeatureDisable = false;
213
215private:
216 SIInstrInfo InstrInfo;
217 SITargetLowering TLInfo;
218 SIFrameLowering FrameLowering;
219
220public:
221 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
222 const GCNTargetMachine &TM);
223 ~GCNSubtarget() override;
224
226 StringRef GPU, StringRef FS);
227
228 const SIInstrInfo *getInstrInfo() const override {
229 return &InstrInfo;
230 }
231
232 const SIFrameLowering *getFrameLowering() const override {
233 return &FrameLowering;
234 }
235
236 const SITargetLowering *getTargetLowering() const override {
237 return &TLInfo;
238 }
239
240 const SIRegisterInfo *getRegisterInfo() const override {
241 return &InstrInfo.getRegisterInfo();
242 }
243
244 const CallLowering *getCallLowering() const override {
245 return CallLoweringInfo.get();
246 }
247
248 const InlineAsmLowering *getInlineAsmLowering() const override {
249 return InlineAsmLoweringInfo.get();
250 }
251
253 return InstSelector.get();
254 }
255
256 const LegalizerInfo *getLegalizerInfo() const override {
257 return Legalizer.get();
258 }
259
260 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
261 return RegBankInfo.get();
262 }
263
265 return TargetID;
266 }
267
268 // Nothing implemented, just prevent crashes on use.
270 return &TSInfo;
271 }
272
274 return &InstrItins;
275 }
276
278
280 return (Generation)Gen;
281 }
282
283 unsigned getMaxWaveScratchSize() const {
284 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
285 if (getGeneration() < GFX11) {
286 // 13-bit field in units of 256-dword.
287 return (256 * 4) * ((1 << 13) - 1);
288 }
289 // 15-bit field in units of 64-dword.
290 return (64 * 4) * ((1 << 15) - 1);
291 }
292
293 /// Return the number of high bits known to be zero for a frame index.
296 }
297
298 int getLDSBankCount() const {
299 return LDSBankCount;
300 }
301
302 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
303 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
304 }
305
306 unsigned getConstantBusLimit(unsigned Opcode) const;
307
308 /// Returns if the result of this instruction with a 16-bit result returned in
309 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
310 /// the original value.
311 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
312
313 bool supportsWGP() const { return getGeneration() >= GFX10; }
314
315 bool hasIntClamp() const {
316 return HasIntClamp;
317 }
318
319 bool hasFP64() const {
320 return FP64;
321 }
322
323 bool hasMIMG_R128() const {
324 return MIMG_R128;
325 }
326
327 bool hasHWFP64() const {
328 return FP64;
329 }
330
331 bool hasFastFMAF32() const {
332 return FastFMAF32;
333 }
334
335 bool hasHalfRate64Ops() const {
336 return HalfRate64Ops;
337 }
338
339 bool hasFullRate64Ops() const {
340 return FullRate64Ops;
341 }
342
343 bool hasAddr64() const {
345 }
346
347 bool hasFlat() const {
349 }
350
351 // Return true if the target only has the reverse operand versions of VALU
352 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
353 bool hasOnlyRevVALUShifts() const {
355 }
356
357 bool hasFractBug() const {
359 }
360
361 bool hasBFE() const {
362 return true;
363 }
364
365 bool hasBFI() const {
366 return true;
367 }
368
369 bool hasBFM() const {
370 return hasBFE();
371 }
372
373 bool hasBCNT(unsigned Size) const {
374 return true;
375 }
376
377 bool hasFFBL() const {
378 return true;
379 }
380
381 bool hasFFBH() const {
382 return true;
383 }
384
385 bool hasMed3_16() const {
387 }
388
389 bool hasMin3Max3_16() const {
391 }
392
393 bool hasFmaMixInsts() const {
394 return HasFmaMixInsts;
395 }
396
397 bool hasCARRY() const {
398 return true;
399 }
400
401 bool hasFMA() const {
402 return FMA;
403 }
404
405 bool hasSwap() const {
406 return GFX9Insts;
407 }
408
409 bool hasScalarPackInsts() const {
410 return GFX9Insts;
411 }
412
413 bool hasScalarMulHiInsts() const {
414 return GFX9Insts;
415 }
416
419 }
420
422 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
423 return getGeneration() >= GFX9;
424 }
425
426 /// True if the offset field of DS instructions works as expected. On SI, the
427 /// offset uses a 16-bit adder and does not always wrap properly.
428 bool hasUsableDSOffset() const {
429 return getGeneration() >= SEA_ISLANDS;
430 }
431
434 }
435
436 /// Condition output from div_scale is usable.
439 }
440
441 /// Extra wait hazard is needed in some cases before
442 /// s_cbranch_vccnz/s_cbranch_vccz.
443 bool hasReadVCCZBug() const {
444 return getGeneration() <= SEA_ISLANDS;
445 }
446
447 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
449 return getGeneration() >= GFX10;
450 }
451
452 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
453 /// was written by a VALU instruction.
456 }
457
458 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
459 /// SGPR was written by a VALU Instruction.
462 }
463
464 bool hasRFEHazards() const {
466 }
467
468 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
469 unsigned getSetRegWaitStates() const {
470 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
471 }
472
473 bool dumpCode() const {
474 return DumpCode;
475 }
476
477 /// Return the amount of LDS that can be used that will not restrict the
478 /// occupancy lower than WaveCount.
479 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
480 const Function &) const;
481
484 }
485
486 /// \returns If target supports S_DENORM_MODE.
487 bool hasDenormModeInst() const {
489 }
490
491 bool useFlatForGlobal() const {
492 return FlatForGlobal;
493 }
494
495 /// \returns If target supports ds_read/write_b128 and user enables generation
496 /// of ds_read/write_b128.
497 bool useDS128() const {
498 return CIInsts && EnableDS128;
499 }
500
501 /// \return If target supports ds_read/write_b96/128.
502 bool hasDS96AndDS128() const {
503 return CIInsts;
504 }
505
506 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
507 bool haveRoundOpsF64() const {
508 return CIInsts;
509 }
510
511 /// \returns If MUBUF instructions always perform range checking, even for
512 /// buffer resources used for private memory access.
515 }
516
517 /// \returns If target requires PRT Struct NULL support (zero result registers
518 /// for sparse texture support).
519 bool usePRTStrictNull() const {
520 return EnablePRTStrictNull;
521 }
522
525 }
526
527 /// \returns true if the target supports backing off of s_barrier instructions
528 /// when an exception is raised.
530 return BackOffBarrier;
531 }
532
535 }
536
539 }
540
541 bool hasUnalignedDSAccess() const {
542 return UnalignedDSAccess;
543 }
544
547 }
548
551 }
552
554 return UnalignedAccessMode;
555 }
556
557 bool hasApertureRegs() const {
558 return HasApertureRegs;
559 }
560
561 bool isTrapHandlerEnabled() const {
562 return TrapHandler;
563 }
564
565 bool isXNACKEnabled() const {
566 return TargetID.isXnackOnOrAny();
567 }
568
569 bool isTgSplitEnabled() const {
570 return EnableTgSplit;
571 }
572
573 bool isCuModeEnabled() const {
574 return EnableCuMode;
575 }
576
577 bool hasFlatAddressSpace() const {
578 return FlatAddressSpace;
579 }
580
581 bool hasFlatScrRegister() const {
582 return hasFlatAddressSpace();
583 }
584
585 bool hasFlatInstOffsets() const {
586 return FlatInstOffsets;
587 }
588
589 bool hasFlatGlobalInsts() const {
590 return FlatGlobalInsts;
591 }
592
593 bool hasFlatScratchInsts() const {
594 return FlatScratchInsts;
595 }
596
597 // Check if target supports ST addressing mode with FLAT scratch instructions.
598 // The ST addressing mode means no registers are used, either VGPR or SGPR,
599 // but only immediate offset is swizzled and added to the FLAT scratch base.
600 bool hasFlatScratchSTMode() const {
602 }
603
604 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
605
608 }
609
610 bool enableFlatScratch() const {
611 return flatScratchIsArchitected() ||
613 }
614
615 bool hasGlobalAddTidInsts() const {
616 return GFX10_BEncoding;
617 }
618
619 bool hasAtomicCSub() const {
620 return GFX10_BEncoding;
621 }
622
624 return getGeneration() >= GFX9;
625 }
626
629 }
630
632 return getGeneration() > GFX9;
633 }
634
635 bool hasD16LoadStore() const {
636 return getGeneration() >= GFX9;
637 }
638
641 }
642
643 bool hasD16Images() const {
645 }
646
647 /// Return if most LDS instructions have an m0 use that require m0 to be
648 /// initialized.
649 bool ldsRequiresM0Init() const {
650 return getGeneration() < GFX9;
651 }
652
653 // True if the hardware rewinds and replays GWS operations if a wave is
654 // preempted.
655 //
656 // If this is false, a GWS operation requires testing if a nack set the
657 // MEM_VIOL bit, and repeating if so.
658 bool hasGWSAutoReplay() const {
659 return getGeneration() >= GFX9;
660 }
661
662 /// \returns if target has ds_gws_sema_release_all instruction.
663 bool hasGWSSemaReleaseAll() const {
664 return CIInsts;
665 }
666
667 /// \returns true if the target has integer add/sub instructions that do not
668 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
669 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
670 /// for saturation.
671 bool hasAddNoCarry() const {
672 return AddNoCarryInsts;
673 }
674
675 bool hasUnpackedD16VMem() const {
676 return HasUnpackedD16VMem;
677 }
678
679 // Covers VS/PS/CS graphics shaders
680 bool isMesaGfxShader(const Function &F) const {
681 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
682 }
683
684 bool hasMad64_32() const {
685 return getGeneration() >= SEA_ISLANDS;
686 }
687
688 bool hasSDWAOmod() const {
689 return HasSDWAOmod;
690 }
691
692 bool hasSDWAScalar() const {
693 return HasSDWAScalar;
694 }
695
696 bool hasSDWASdst() const {
697 return HasSDWASdst;
698 }
699
700 bool hasSDWAMac() const {
701 return HasSDWAMac;
702 }
703
704 bool hasSDWAOutModsVOPC() const {
705 return HasSDWAOutModsVOPC;
706 }
707
708 bool hasDLInsts() const {
709 return HasDLInsts;
710 }
711
712 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
713
714 bool hasDot1Insts() const {
715 return HasDot1Insts;
716 }
717
718 bool hasDot2Insts() const {
719 return HasDot2Insts;
720 }
721
722 bool hasDot3Insts() const {
723 return HasDot3Insts;
724 }
725
726 bool hasDot4Insts() const {
727 return HasDot4Insts;
728 }
729
730 bool hasDot5Insts() const {
731 return HasDot5Insts;
732 }
733
734 bool hasDot6Insts() const {
735 return HasDot6Insts;
736 }
737
738 bool hasDot7Insts() const {
739 return HasDot7Insts;
740 }
741
742 bool hasDot8Insts() const {
743 return HasDot8Insts;
744 }
745
746 bool hasDot9Insts() const {
747 return HasDot9Insts;
748 }
749
750 bool hasDot10Insts() const {
751 return HasDot10Insts;
752 }
753
754 bool hasMAIInsts() const {
755 return HasMAIInsts;
756 }
757
758 bool hasFP8Insts() const {
759 return HasFP8Insts;
760 }
761
762 bool hasPkFmacF16Inst() const {
763 return HasPkFmacF16Inst;
764 }
765
767
769
770 bool hasAtomicFaddInsts() const {
772 }
773
775
777
780 }
781
784 }
785
788 }
789
791
792 bool hasNoSdstCMPX() const {
793 return HasNoSdstCMPX;
794 }
795
796 bool hasVscnt() const {
797 return HasVscnt;
798 }
799
800 bool hasGetWaveIdInst() const {
801 return HasGetWaveIdInst;
802 }
803
804 bool hasSMemTimeInst() const {
805 return HasSMemTimeInst;
806 }
807
810 }
811
812 bool hasVOP3Literal() const {
813 return HasVOP3Literal;
814 }
815
816 bool hasNoDataDepHazard() const {
817 return HasNoDataDepHazard;
818 }
819
821 return getGeneration() < SEA_ISLANDS;
822 }
823
824 bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
825
826 // Scratch is allocated in 256 dword per wave blocks for the entire
827 // wavefront. When viewed from the perspective of an arbitrary workitem, this
828 // is 4-byte aligned.
829 //
830 // Only 4-byte alignment is really needed to access anything. Transformations
831 // on the pointer value itself may rely on the alignment / known low bits of
832 // the pointer. Set this to something above the minimum to avoid needing
833 // dynamic realignment in common cases.
834 Align getStackAlignment() const { return Align(16); }
835
836 bool enableMachineScheduler() const override {
837 return true;
838 }
839
840 bool useAA() const override;
841
842 bool enableSubRegLiveness() const override {
843 return true;
844 }
845
848
849 // static wrappers
850 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
851
852 // XXX - Why is this here if it isn't in the default pass set?
853 bool enableEarlyIfConversion() const override {
854 return true;
855 }
856
858 unsigned NumRegionInstrs) const override;
859
860 unsigned getMaxNumUserSGPRs() const {
861 return 16;
862 }
863
864 bool hasSMemRealTime() const {
865 return HasSMemRealTime;
866 }
867
868 bool hasMovrel() const {
869 return HasMovrel;
870 }
871
872 bool hasVGPRIndexMode() const {
873 return HasVGPRIndexMode;
874 }
875
876 bool useVGPRIndexMode() const;
877
878 bool hasScalarCompareEq64() const {
880 }
881
882 bool hasScalarStores() const {
883 return HasScalarStores;
884 }
885
886 bool hasScalarAtomics() const {
887 return HasScalarAtomics;
888 }
889
890 bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
891
892 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
893 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
894
895 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
896 bool hasPermLane64() const { return getGeneration() >= GFX11; }
897
898 bool hasDPP() const {
899 return HasDPP;
900 }
901
902 bool hasDPPBroadcasts() const {
903 return HasDPP && getGeneration() < GFX10;
904 }
905
907 return HasDPP && getGeneration() < GFX10;
908 }
909
910 bool hasDPP8() const {
911 return HasDPP8;
912 }
913
914 bool has64BitDPP() const {
915 return Has64BitDPP;
916 }
917
918 bool hasPackedFP32Ops() const {
919 return HasPackedFP32Ops;
920 }
921
923 return getGeneration() >= GFX10 || hasGFX940Insts();
924 }
925
926 bool hasImageInsts() const {
927 return HasImageInsts;
928 }
929
932 }
933
934 bool hasR128A16() const {
935 return HasR128A16;
936 }
937
938 bool hasA16() const { return HasA16; }
939
940 bool hasG16() const { return HasG16; }
941
942 bool hasOffset3fBug() const {
943 return HasOffset3fBug;
944 }
945
947
949
950 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
951
952 bool hasNSAEncoding() const { return HasNSAEncoding; }
953
955
956 unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
957
958 bool hasGFX10_AEncoding() const {
959 return GFX10_AEncoding;
960 }
961
962 bool hasGFX10_BEncoding() const {
963 return GFX10_BEncoding;
964 }
965
966 bool hasGFX10_3Insts() const {
967 return GFX10_3Insts;
968 }
969
970 bool hasMadF16() const;
971
972 bool hasMovB64() const { return GFX940Insts; }
973
974 bool hasLshlAddB64() const { return GFX940Insts; }
975
976 bool enableSIScheduler() const {
977 return EnableSIScheduler;
978 }
979
980 bool loadStoreOptEnabled() const {
981 return EnableLoadStoreOpt;
982 }
983
984 bool hasSGPRInitBug() const {
985 return SGPRInitBug;
986 }
987
988 bool hasUserSGPRInit16Bug() const {
989 return UserSGPRInit16Bug && isWave32();
990 }
991
993
996 }
997
1000 }
1001
1004 }
1005
1006 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1008 return CIInsts;
1009 }
1010
1013 }
1014
1018 }
1019
1022 }
1023
1026 }
1027
1030 }
1031
1034 }
1035
1038 }
1039
1040 bool hasLDSMisalignedBug() const {
1041 return LDSMisalignedBug && !EnableCuMode;
1042 }
1043
1045 return HasInstFwdPrefetchBug;
1046 }
1047
1049 return HasVcmpxExecWARHazard;
1050 }
1051
1054 }
1055
1056 // Shift amount of a 64 bit shift cannot be a highest allocated register
1057 // if also at the end of the allocation block.
1059 return GFX90AInsts && !GFX940Insts;
1060 }
1061
1062 // Has one cycle hazard on transcendental instruction feeding a
1063 // non transcendental VALU.
1064 bool hasTransForwardingHazard() const { return GFX940Insts; }
1065
1066 // Has one cycle hazard on a VALU instruction partially writing dst with
1067 // a shift of result bits feeding another VALU instruction.
1069
1070 // Cannot use op_sel with v_dot instructions.
1071 bool hasDOTOpSelHazard() const { return GFX940Insts; }
1072
1073 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1074 bool hasVDecCoExecHazard() const {
1075 return GFX940Insts;
1076 }
1077
1078 bool hasNSAtoVMEMBug() const {
1079 return HasNSAtoVMEMBug;
1080 }
1081
1082 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1083
1084 bool hasHardClauses() const { return getGeneration() >= GFX10; }
1085
1086 bool hasGFX90AInsts() const { return GFX90AInsts; }
1087
1089 return getGeneration() == GFX10;
1090 }
1091
1092 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1093
1094 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1095
1097 return getGeneration() >= GFX11;
1098 }
1099
1101
1103
1104 bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1105
1106 /// Return if operations acting on VGPR tuples require even alignment.
1107 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1108
1109 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1110 bool hasSPackHL() const { return GFX11Insts; }
1111
1112 /// Return true if the target's EXP instruction has the COMPR flag, which
1113 /// affects the meaning of the EN (enable) bits.
1114 bool hasCompressedExport() const { return !GFX11Insts; }
1115
1116 /// Return true if the target's EXP instruction supports the NULL export
1117 /// target.
1118 bool hasNullExportTarget() const { return !GFX11Insts; }
1119
1120 bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1121
1122 bool hasVOPDInsts() const { return HasVOPDInsts; }
1123
1125
1126 /// Return true if the target has the S_DELAY_ALU instruction.
1127 bool hasDelayAlu() const { return GFX11Insts; }
1128
1129 bool hasPackedTID() const { return HasPackedTID; }
1130
1131 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1132 // hasGFX90AInsts is also true.
1133 bool hasGFX940Insts() const { return GFX940Insts; }
1134
1135 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1136 /// SGPRs
1137 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1138
1139 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1140 /// VGPRs
1141 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1142
1143 /// Return occupancy for the given function. Used LDS and a number of
1144 /// registers if provided.
1145 /// Note, occupancy can be affected by the scratch allocation as well, but
1146 /// we do not have enough information to compute it.
1147 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1148 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1149
1150 /// \returns true if the flat_scratch register should be initialized with the
1151 /// pointer to the wave's scratch memory rather than a size and offset.
1154 }
1155
1156 /// \returns true if the flat_scratch register is initialized by the HW.
1157 /// In this case it is readonly.
1159
1160 /// \returns true if the architected SGPRs are enabled.
1162
1163 /// \returns true if the machine has merged shaders in which s0-s7 are
1164 /// reserved by the hardware and user SGPRs start at s8
1165 bool hasMergedShaders() const {
1166 return getGeneration() >= GFX9;
1167 }
1168
1169 // \returns true if the target supports the pre-NGG legacy geometry path.
1170 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1171
1172 /// \returns SGPR allocation granularity supported by the subtarget.
1173 unsigned getSGPRAllocGranule() const {
1175 }
1176
1177 /// \returns SGPR encoding granularity supported by the subtarget.
1178 unsigned getSGPREncodingGranule() const {
1180 }
1181
1182 /// \returns Total number of SGPRs supported by the subtarget.
1183 unsigned getTotalNumSGPRs() const {
1185 }
1186
1187 /// \returns Addressable number of SGPRs supported by the subtarget.
1188 unsigned getAddressableNumSGPRs() const {
1190 }
1191
1192 /// \returns Minimum number of SGPRs that meets the given number of waves per
1193 /// execution unit requirement supported by the subtarget.
1194 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1195 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1196 }
1197
1198 /// \returns Maximum number of SGPRs that meets the given number of waves per
1199 /// execution unit requirement supported by the subtarget.
1200 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1201 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1202 }
1203
1204 /// \returns Reserved number of SGPRs. This is common
1205 /// utility function called by MachineFunction and
1206 /// Function variants of getReservedNumSGPRs.
1207 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1208 /// \returns Reserved number of SGPRs for given machine function \p MF.
1209 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1210
1211 /// \returns Reserved number of SGPRs for given function \p F.
1212 unsigned getReservedNumSGPRs(const Function &F) const;
1213
1214 /// \returns max num SGPRs. This is the common utility
1215 /// function called by MachineFunction and Function
1216 /// variants of getMaxNumSGPRs.
1217 unsigned getBaseMaxNumSGPRs(const Function &F,
1218 std::pair<unsigned, unsigned> WavesPerEU,
1219 unsigned PreloadedSGPRs,
1220 unsigned ReservedNumSGPRs) const;
1221
1222 /// \returns Maximum number of SGPRs that meets number of waves per execution
1223 /// unit requirement for function \p MF, or number of SGPRs explicitly
1224 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1225 ///
1226 /// \returns Value that meets number of waves per execution unit requirement
1227 /// if explicitly requested value cannot be converted to integer, violates
1228 /// subtarget's specifications, or does not meet number of waves per execution
1229 /// unit requirement.
1230 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1231
1232 /// \returns Maximum number of SGPRs that meets number of waves per execution
1233 /// unit requirement for function \p F, or number of SGPRs explicitly
1234 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1235 ///
1236 /// \returns Value that meets number of waves per execution unit requirement
1237 /// if explicitly requested value cannot be converted to integer, violates
1238 /// subtarget's specifications, or does not meet number of waves per execution
1239 /// unit requirement.
1240 unsigned getMaxNumSGPRs(const Function &F) const;
1241
1242 /// \returns VGPR allocation granularity supported by the subtarget.
1243 unsigned getVGPRAllocGranule() const {
1245 }
1246
1247 /// \returns VGPR encoding granularity supported by the subtarget.
1248 unsigned getVGPREncodingGranule() const {
1250 }
1251
1252 /// \returns Total number of VGPRs supported by the subtarget.
1253 unsigned getTotalNumVGPRs() const {
1255 }
1256
1257 /// \returns Addressable number of VGPRs supported by the subtarget.
1258 unsigned getAddressableNumVGPRs() const {
1260 }
1261
1262 /// \returns the minimum number of VGPRs that will prevent achieving more than
1263 /// the specified number of waves \p WavesPerEU.
1264 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1265 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1266 }
1267
1268 /// \returns the maximum number of VGPRs that can be used and still achieved
1269 /// at least the specified number of waves \p WavesPerEU.
1270 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1271 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1272 }
1273
1274 /// \returns max num VGPRs. This is the common utility function
1275 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1276 unsigned getBaseMaxNumVGPRs(const Function &F,
1277 std::pair<unsigned, unsigned> WavesPerEU) const;
1278 /// \returns Maximum number of VGPRs that meets number of waves per execution
1279 /// unit requirement for function \p F, or number of VGPRs explicitly
1280 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1281 ///
1282 /// \returns Value that meets number of waves per execution unit requirement
1283 /// if explicitly requested value cannot be converted to integer, violates
1284 /// subtarget's specifications, or does not meet number of waves per execution
1285 /// unit requirement.
1286 unsigned getMaxNumVGPRs(const Function &F) const;
1287
1288 unsigned getMaxNumAGPRs(const Function &F) const {
1289 return getMaxNumVGPRs(F);
1290 }
1291
1292 /// \returns Maximum number of VGPRs that meets number of waves per execution
1293 /// unit requirement for function \p MF, or number of VGPRs explicitly
1294 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1295 ///
1296 /// \returns Value that meets number of waves per execution unit requirement
1297 /// if explicitly requested value cannot be converted to integer, violates
1298 /// subtarget's specifications, or does not meet number of waves per execution
1299 /// unit requirement.
1300 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1301
1302 void getPostRAMutations(
1303 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1304 const override;
1305
1306 std::unique_ptr<ScheduleDAGMutation>
1308
1309 bool isWave32() const {
1310 return getWavefrontSize() == 32;
1311 }
1312
1313 bool isWave64() const {
1314 return getWavefrontSize() == 64;
1315 }
1316
1318 return getRegisterInfo()->getBoolRC();
1319 }
1320
1321 /// \returns Maximum number of work groups per compute unit supported by the
1322 /// subtarget and limited by given \p FlatWorkGroupSize.
1323 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1324 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1325 }
1326
1327 /// \returns Minimum flat work group size supported by the subtarget.
1328 unsigned getMinFlatWorkGroupSize() const override {
1330 }
1331
1332 /// \returns Maximum flat work group size supported by the subtarget.
1333 unsigned getMaxFlatWorkGroupSize() const override {
1335 }
1336
1337 /// \returns Number of waves per execution unit required to support the given
1338 /// \p FlatWorkGroupSize.
1339 unsigned
1340 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1341 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1342 }
1343
1344 /// \returns Minimum number of waves per execution unit supported by the
1345 /// subtarget.
1346 unsigned getMinWavesPerEU() const override {
1348 }
1349
1350 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1351 SDep &Dep) const override;
1352
1353 // \returns true if it's beneficial on this subtarget for the scheduler to
1354 // cluster stores as well as loads.
1355 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1356
1357 // \returns the number of address arguments from which to enable MIMG NSA
1358 // on supported architectures.
1359 unsigned getNSAThreshold(const MachineFunction &MF) const;
1360};
1361
1362} // end namespace llvm
1363
1364#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
return InstrInfo
unsigned UseOpIdx
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasFlat() const
Definition: GCNSubtarget.h:347
bool hasD16Images() const
Definition: GCNSubtarget.h:643
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:63
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:766
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:688
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:198
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:448
bool hasSwap() const
Definition: GCNSubtarget.h:405
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:762
bool hasDot2Insts() const
Definition: GCNSubtarget.h:718
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:635
bool hasMergedShaders() const
bool hasA16() const
Definition: GCNSubtarget.h:938
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:692
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:529
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:878
int getLDSBankCount() const
Definition: GCNSubtarget.h:298
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:353
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:946
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:437
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:428
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:980
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:842
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:906
unsigned getSGPRAllocGranule() const
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:631
bool flatScratchIsPointer() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
bool hasSDWAMac() const
Definition: GCNSubtarget.h:700
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:738
bool hasApertureRegs() const
Definition: GCNSubtarget.h:557
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:65
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:432
bool hasFPAtomicToDenormModeHazard() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:585
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:820
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:800
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:846
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:708
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:930
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:373
bool hasMAIInsts() const
Definition: GCNSubtarget.h:754
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:593
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:623
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:327
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:487
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:998
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:804
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:545
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:992
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:228
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:714
bool hasDot3Insts() const
Definition: GCNSubtarget.h:722
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:950
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:248
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:523
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:774
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:273
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:966
Align getStackAlignment() const
Definition: GCNSubtarget.h:834
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:513
bool enableFlatScratch() const
Definition: GCNSubtarget.h:610
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:533
bool hasR128A16() const
Definition: GCNSubtarget.h:934
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:942
bool hasDwordx3LoadStores() const
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:615
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:984
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:581
bool hasPermLane64() const
Definition: GCNSubtarget.h:896
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:421
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:569
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:790
bool hasFP8Insts() const
Definition: GCNSubtarget.h:758
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:240
bool has64BitDPP() const
Definition: GCNSubtarget.h:914
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:922
bool hasVscnt() const
Definition: GCNSubtarget.h:796
bool hasMad64_32() const
Definition: GCNSubtarget.h:684
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:252
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:114
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:497
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:639
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:712
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:824
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:680
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
Definition: GCNSubtarget.h:988
bool hasDPP() const
Definition: GCNSubtarget.h:898
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:260
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:417
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:573
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:886
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:232
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:549
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:269
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:704
bool hasGFX11FullVGPRs() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:264
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:606
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:890
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:473
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:816
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:541
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:389
bool hasIntClamp() const
Definition: GCNSubtarget.h:315
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:958
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:627
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:469
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:236
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:918
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:734
bool hasGFX940Insts() const
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:974
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:339
bool hasScalarStores() const
Definition: GCNSubtarget.h:882
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:561
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:836
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:156
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:589
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:847
bool hasReadM0LdsDmaHazard() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:294
bool hasSDWASdst() const
Definition: GCNSubtarget.h:696
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:409
bool hasFFBL() const
Definition: GCNSubtarget.h:377
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:952
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:864
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:577
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:902
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:519
bool hasMovB64() const
Definition: GCNSubtarget.h:972
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:385
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:868
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:331
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:768
bool hasBFI() const
Definition: GCNSubtarget.h:365
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:649
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:195
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:872
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:160
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:537
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:302
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
Definition: GCNSubtarget.h:926
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:948
bool hasFMA() const
Definition: GCNSubtarget.h:401
bool hasDot10Insts() const
Definition: GCNSubtarget.h:750
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:482
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:994
bool hasFFBH() const
Definition: GCNSubtarget.h:381
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:604
bool supportsWGP() const
Definition: GCNSubtarget.h:313
bool hasG16() const
Definition: GCNSubtarget.h:940
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:335
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:770
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:159
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:181
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:778
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:323
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:910
bool hasDot5Insts() const
Definition: GCNSubtarget.h:730
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:860
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:776
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:893
bool hasFlatScratchSVSSwizzleBug() const
bool hasVDecCoExecHazard() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:369
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:507
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:742
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:413
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:256
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:502
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:491
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:962
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:214
Generation getGeneration() const
Definition: GCNSubtarget.h:279
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:812
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:782
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:792
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:565
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:675
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:853
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:454
bool hasRFEHazards() const
Definition: GCNSubtarget.h:464
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:460
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:600
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:663
bool enableSIScheduler() const
Definition: GCNSubtarget.h:976
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:786
bool hasAddr64() const
Definition: GCNSubtarget.h:343
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:194
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:161
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:553
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:443
bool isWave64() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:393
bool hasCARRY() const
Definition: GCNSubtarget.h:397
bool hasPackedTID() const
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:956
bool hasFP64() const
Definition: GCNSubtarget.h:319
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:671
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:808
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:92
bool hasFractBug() const
Definition: GCNSubtarget.h:357
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:283
bool hasDot4Insts() const
Definition: GCNSubtarget.h:726
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
Definition: GCNSubtarget.h:954
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:746
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:619
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:61
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:244
bool hasBFE() const
Definition: GCNSubtarget.h:361
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:658
Itinerary data supplied by a subtarget to be used by a target.
Provides the logic to select generic machine instructions.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.