LLVM  16.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 #define GET_SUBTARGETINFO_HEADER
25 #include "AMDGPUGenSubtargetInfo.inc"
26 
27 namespace llvm {
28 
29 class GCNTargetMachine;
30 
31 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
32  public AMDGPUSubtarget {
33 public:
35 
36  // Following 2 enums are documented at:
37  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
38  enum class TrapHandlerAbi {
39  NONE = 0x00,
40  AMDHSA = 0x01,
41  };
42 
43  enum class TrapID {
44  LLVMAMDHSATrap = 0x02,
45  LLVMAMDHSADebugTrap = 0x03,
46  };
47 
48 private:
49  /// GlobalISel related APIs.
50  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
51  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
52  std::unique_ptr<InstructionSelector> InstSelector;
53  std::unique_ptr<LegalizerInfo> Legalizer;
54  std::unique_ptr<RegisterBankInfo> RegBankInfo;
55 
56 protected:
57  // Basic subtarget description.
60  unsigned Gen = INVALID;
62  int LDSBankCount = 0;
63  unsigned MaxPrivateElementSize = 0;
64 
65  // Possibly statically set by tablegen, but may want to be overridden.
66  bool FastFMAF32 = false;
67  bool FastDenormalF32 = false;
68  bool HalfRate64Ops = false;
69  bool FullRate64Ops = false;
70 
71  // Dynamically set bits that enable features.
72  bool FlatForGlobal = false;
74  bool BackOffBarrier = false;
75  bool UnalignedScratchAccess = false;
76  bool UnalignedAccessMode = false;
77  bool HasApertureRegs = false;
78  bool SupportsXNACK = false;
79 
80  // This should not be used directly. 'TargetID' tracks the dynamic settings
81  // for XNACK.
82  bool EnableXNACK = false;
83 
84  bool EnableTgSplit = false;
85  bool EnableCuMode = false;
86  bool TrapHandler = false;
87 
88  // Used as options.
89  bool EnableLoadStoreOpt = false;
91  bool EnableSIScheduler = false;
92  bool EnableDS128 = false;
93  bool EnablePRTStrictNull = false;
94  bool DumpCode = false;
95 
96  // Subtarget statically properties set by tablegen
97  bool FP64 = false;
98  bool FMA = false;
99  bool MIMG_R128 = false;
100  bool CIInsts = false;
101  bool GFX8Insts = false;
102  bool GFX9Insts = false;
103  bool GFX90AInsts = false;
104  bool GFX940Insts = false;
105  bool GFX10Insts = false;
106  bool GFX11Insts = false;
107  bool GFX10_3Insts = false;
108  bool GFX7GFX8GFX9Insts = false;
109  bool SGPRInitBug = false;
110  bool UserSGPRInit16Bug = false;
113  bool HasSMemRealTime = false;
114  bool HasIntClamp = false;
115  bool HasFmaMixInsts = false;
116  bool HasMovrel = false;
117  bool HasVGPRIndexMode = false;
118  bool HasScalarStores = false;
119  bool HasScalarAtomics = false;
120  bool HasSDWAOmod = false;
121  bool HasSDWAScalar = false;
122  bool HasSDWASdst = false;
123  bool HasSDWAMac = false;
124  bool HasSDWAOutModsVOPC = false;
125  bool HasDPP = false;
126  bool HasDPP8 = false;
127  bool Has64BitDPP = false;
128  bool HasPackedFP32Ops = false;
129  bool HasImageInsts = false;
130  bool HasExtendedImageInsts = false;
131  bool HasR128A16 = false;
132  bool HasGFX10A16 = false;
133  bool HasG16 = false;
134  bool HasNSAEncoding = false;
135  unsigned NSAMaxSize = 0;
136  bool GFX10_AEncoding = false;
137  bool GFX10_BEncoding = false;
138  bool HasDLInsts = false;
139  bool HasDot1Insts = false;
140  bool HasDot2Insts = false;
141  bool HasDot3Insts = false;
142  bool HasDot4Insts = false;
143  bool HasDot5Insts = false;
144  bool HasDot6Insts = false;
145  bool HasDot7Insts = false;
146  bool HasDot8Insts = false;
147  bool HasMAIInsts = false;
148  bool HasFP8Insts = false;
149  bool HasPkFmacF16Inst = false;
150  bool HasAtomicFaddRtnInsts = false;
154  bool SupportsSRAMECC = false;
155 
156  // This should not be used directly. 'TargetID' tracks the dynamic settings
157  // for SRAMECC.
158  bool EnableSRAMECC = false;
159 
160  bool HasNoSdstCMPX = false;
161  bool HasVscnt = false;
162  bool HasGetWaveIdInst = false;
163  bool HasSMemTimeInst = false;
165  bool HasVOP3Literal = false;
166  bool HasNoDataDepHazard = false;
167  bool FlatAddressSpace = false;
168  bool FlatInstOffsets = false;
169  bool FlatGlobalInsts = false;
170  bool FlatScratchInsts = false;
173  bool EnableFlatScratch = false;
174  bool AddNoCarryInsts = false;
175  bool HasUnpackedD16VMem = false;
176  bool LDSMisalignedBug = false;
178  bool UnalignedBufferAccess = false;
179  bool UnalignedDSAccess = false;
180  bool HasPackedTID = false;
181  bool ScalarizeGlobal = false;
182 
186  bool HasInstFwdPrefetchBug = false;
187  bool HasVcmpxExecWARHazard = false;
189  bool HasNSAtoVMEMBug = false;
190  bool HasNSAClauseBug = false;
191  bool HasOffset3fBug = false;
193  bool HasImageStoreD16Bug = false;
194  bool HasImageGather4D16Bug = false;
195  bool HasGFX11FullVGPRs = false;
196  bool HasVOPDInsts = false;
197 
198  // Dummy feature to use for assembler in tablegen.
199  bool FeatureDisable = false;
200 
202 private:
203  SIInstrInfo InstrInfo;
204  SITargetLowering TLInfo;
205  SIFrameLowering FrameLowering;
206 
207 public:
208  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
209  const GCNTargetMachine &TM);
210  ~GCNSubtarget() override;
211 
213  StringRef GPU, StringRef FS);
214 
215  const SIInstrInfo *getInstrInfo() const override {
216  return &InstrInfo;
217  }
218 
219  const SIFrameLowering *getFrameLowering() const override {
220  return &FrameLowering;
221  }
222 
223  const SITargetLowering *getTargetLowering() const override {
224  return &TLInfo;
225  }
226 
227  const SIRegisterInfo *getRegisterInfo() const override {
228  return &InstrInfo.getRegisterInfo();
229  }
230 
231  const CallLowering *getCallLowering() const override {
232  return CallLoweringInfo.get();
233  }
234 
235  const InlineAsmLowering *getInlineAsmLowering() const override {
236  return InlineAsmLoweringInfo.get();
237  }
238 
240  return InstSelector.get();
241  }
242 
243  const LegalizerInfo *getLegalizerInfo() const override {
244  return Legalizer.get();
245  }
246 
247  const RegisterBankInfo *getRegBankInfo() const override {
248  return RegBankInfo.get();
249  }
250 
252  return TargetID;
253  }
254 
255  // Nothing implemented, just prevent crashes on use.
256  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
257  return &TSInfo;
258  }
259 
260  const InstrItineraryData *getInstrItineraryData() const override {
261  return &InstrItins;
262  }
263 
265 
267  return (Generation)Gen;
268  }
269 
270  unsigned getMaxWaveScratchSize() const {
271  // See COMPUTE_TMPRING_SIZE.WAVESIZE.
272  if (getGeneration() < GFX11) {
273  // 13-bit field in units of 256-dword.
274  return (256 * 4) * ((1 << 13) - 1);
275  }
276  // 15-bit field in units of 64-dword.
277  return (64 * 4) * ((1 << 15) - 1);
278  }
279 
280  /// Return the number of high bits known to be zero for a frame index.
283  }
284 
285  int getLDSBankCount() const {
286  return LDSBankCount;
287  }
288 
289  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
290  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
291  }
292 
293  unsigned getConstantBusLimit(unsigned Opcode) const;
294 
295  /// Returns if the result of this instruction with a 16-bit result returned in
296  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
297  /// the original value.
298  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
299 
300  bool hasIntClamp() const {
301  return HasIntClamp;
302  }
303 
304  bool hasFP64() const {
305  return FP64;
306  }
307 
308  bool hasMIMG_R128() const {
309  return MIMG_R128;
310  }
311 
312  bool hasHWFP64() const {
313  return FP64;
314  }
315 
316  bool hasFastFMAF32() const {
317  return FastFMAF32;
318  }
319 
320  bool hasHalfRate64Ops() const {
321  return HalfRate64Ops;
322  }
323 
324  bool hasFullRate64Ops() const {
325  return FullRate64Ops;
326  }
327 
328  bool hasAddr64() const {
330  }
331 
332  bool hasFlat() const {
334  }
335 
336  // Return true if the target only has the reverse operand versions of VALU
337  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
338  bool hasOnlyRevVALUShifts() const {
339  return getGeneration() >= VOLCANIC_ISLANDS;
340  }
341 
342  bool hasFractBug() const {
343  return getGeneration() == SOUTHERN_ISLANDS;
344  }
345 
346  bool hasBFE() const {
347  return true;
348  }
349 
350  bool hasBFI() const {
351  return true;
352  }
353 
354  bool hasBFM() const {
355  return hasBFE();
356  }
357 
358  bool hasBCNT(unsigned Size) const {
359  return true;
360  }
361 
362  bool hasFFBL() const {
363  return true;
364  }
365 
366  bool hasFFBH() const {
367  return true;
368  }
369 
370  bool hasMed3_16() const {
372  }
373 
374  bool hasMin3Max3_16() const {
376  }
377 
378  bool hasFmaMixInsts() const {
379  return HasFmaMixInsts;
380  }
381 
382  bool hasCARRY() const {
383  return true;
384  }
385 
386  bool hasFMA() const {
387  return FMA;
388  }
389 
390  bool hasSwap() const {
391  return GFX9Insts;
392  }
393 
394  bool hasScalarPackInsts() const {
395  return GFX9Insts;
396  }
397 
398  bool hasScalarMulHiInsts() const {
399  return GFX9Insts;
400  }
401 
404  }
405 
406  bool supportsGetDoorbellID() const {
407  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
408  return getGeneration() >= GFX9;
409  }
410 
411  /// True if the offset field of DS instructions works as expected. On SI, the
412  /// offset uses a 16-bit adder and does not always wrap properly.
413  bool hasUsableDSOffset() const {
414  return getGeneration() >= SEA_ISLANDS;
415  }
416 
419  }
420 
421  /// Condition output from div_scale is usable.
423  return getGeneration() != SOUTHERN_ISLANDS;
424  }
425 
426  /// Extra wait hazard is needed in some cases before
427  /// s_cbranch_vccnz/s_cbranch_vccz.
428  bool hasReadVCCZBug() const {
429  return getGeneration() <= SEA_ISLANDS;
430  }
431 
432  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
434  return getGeneration() >= GFX10;
435  }
436 
437  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
438  /// was written by a VALU instruction.
440  return getGeneration() == SOUTHERN_ISLANDS;
441  }
442 
443  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
444  /// SGPR was written by a VALU Instruction.
446  return getGeneration() >= VOLCANIC_ISLANDS;
447  }
448 
449  bool hasRFEHazards() const {
450  return getGeneration() >= VOLCANIC_ISLANDS;
451  }
452 
453  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
454  unsigned getSetRegWaitStates() const {
455  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
456  }
457 
458  bool dumpCode() const {
459  return DumpCode;
460  }
461 
462  /// Return the amount of LDS that can be used that will not restrict the
463  /// occupancy lower than WaveCount.
464  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
465  const Function &) const;
466 
469  }
470 
471  /// \returns If target supports S_DENORM_MODE.
472  bool hasDenormModeInst() const {
474  }
475 
476  bool useFlatForGlobal() const {
477  return FlatForGlobal;
478  }
479 
480  /// \returns If target supports ds_read/write_b128 and user enables generation
481  /// of ds_read/write_b128.
482  bool useDS128() const {
483  return CIInsts && EnableDS128;
484  }
485 
486  /// \return If target supports ds_read/write_b96/128.
487  bool hasDS96AndDS128() const {
488  return CIInsts;
489  }
490 
491  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
492  bool haveRoundOpsF64() const {
493  return CIInsts;
494  }
495 
496  /// \returns If MUBUF instructions always perform range checking, even for
497  /// buffer resources used for private memory access.
500  }
501 
502  /// \returns If target requires PRT Struct NULL support (zero result registers
503  /// for sparse texture support).
504  bool usePRTStrictNull() const {
505  return EnablePRTStrictNull;
506  }
507 
510  }
511 
512  /// \returns true if the target supports backing off of s_barrier instructions
513  /// when an exception is raised.
514  bool supportsBackOffBarrier() const {
515  return BackOffBarrier;
516  }
517 
519  return UnalignedBufferAccess;
520  }
521 
524  }
525 
526  bool hasUnalignedDSAccess() const {
527  return UnalignedDSAccess;
528  }
529 
532  }
533 
535  return UnalignedScratchAccess;
536  }
537 
538  bool hasUnalignedAccessMode() const {
539  return UnalignedAccessMode;
540  }
541 
542  bool hasApertureRegs() const {
543  return HasApertureRegs;
544  }
545 
546  bool isTrapHandlerEnabled() const {
547  return TrapHandler;
548  }
549 
550  bool isXNACKEnabled() const {
551  return TargetID.isXnackOnOrAny();
552  }
553 
554  bool isTgSplitEnabled() const {
555  return EnableTgSplit;
556  }
557 
558  bool isCuModeEnabled() const {
559  return EnableCuMode;
560  }
561 
562  bool hasFlatAddressSpace() const {
563  return FlatAddressSpace;
564  }
565 
566  bool hasFlatScrRegister() const {
567  return hasFlatAddressSpace();
568  }
569 
570  bool hasFlatInstOffsets() const {
571  return FlatInstOffsets;
572  }
573 
574  bool hasFlatGlobalInsts() const {
575  return FlatGlobalInsts;
576  }
577 
578  bool hasFlatScratchInsts() const {
579  return FlatScratchInsts;
580  }
581 
582  // Check if target supports ST addressing mode with FLAT scratch instructions.
583  // The ST addressing mode means no registers are used, either VGPR or SGPR,
584  // but only immediate offset is swizzled and added to the FLAT scratch base.
585  bool hasFlatScratchSTMode() const {
587  }
588 
589  bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
590 
592  return ScalarFlatScratchInsts;
593  }
594 
595  bool enableFlatScratch() const {
596  return flatScratchIsArchitected() ||
598  }
599 
600  bool hasGlobalAddTidInsts() const {
601  return GFX10_BEncoding;
602  }
603 
604  bool hasAtomicCSub() const {
605  return GFX10_BEncoding;
606  }
607 
609  return getGeneration() >= GFX9;
610  }
611 
612  bool hasFlatSegmentOffsetBug() const {
614  }
615 
617  return getGeneration() > GFX9;
618  }
619 
620  bool hasD16LoadStore() const {
621  return getGeneration() >= GFX9;
622  }
623 
624  bool d16PreservesUnusedBits() const {
626  }
627 
628  bool hasD16Images() const {
629  return getGeneration() >= VOLCANIC_ISLANDS;
630  }
631 
632  /// Return if most LDS instructions have an m0 use that require m0 to be
633  /// initialized.
634  bool ldsRequiresM0Init() const {
635  return getGeneration() < GFX9;
636  }
637 
638  // True if the hardware rewinds and replays GWS operations if a wave is
639  // preempted.
640  //
641  // If this is false, a GWS operation requires testing if a nack set the
642  // MEM_VIOL bit, and repeating if so.
643  bool hasGWSAutoReplay() const {
644  return getGeneration() >= GFX9;
645  }
646 
647  /// \returns if target has ds_gws_sema_release_all instruction.
648  bool hasGWSSemaReleaseAll() const {
649  return CIInsts;
650  }
651 
652  /// \returns true if the target has integer add/sub instructions that do not
653  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
654  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
655  /// for saturation.
656  bool hasAddNoCarry() const {
657  return AddNoCarryInsts;
658  }
659 
660  bool hasUnpackedD16VMem() const {
661  return HasUnpackedD16VMem;
662  }
663 
664  // Covers VS/PS/CS graphics shaders
665  bool isMesaGfxShader(const Function &F) const {
666  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
667  }
668 
669  bool hasMad64_32() const {
670  return getGeneration() >= SEA_ISLANDS;
671  }
672 
673  bool hasSDWAOmod() const {
674  return HasSDWAOmod;
675  }
676 
677  bool hasSDWAScalar() const {
678  return HasSDWAScalar;
679  }
680 
681  bool hasSDWASdst() const {
682  return HasSDWASdst;
683  }
684 
685  bool hasSDWAMac() const {
686  return HasSDWAMac;
687  }
688 
689  bool hasSDWAOutModsVOPC() const {
690  return HasSDWAOutModsVOPC;
691  }
692 
693  bool hasDLInsts() const {
694  return HasDLInsts;
695  }
696 
697  bool hasDot1Insts() const {
698  return HasDot1Insts;
699  }
700 
701  bool hasDot2Insts() const {
702  return HasDot2Insts;
703  }
704 
705  bool hasDot3Insts() const {
706  return HasDot3Insts;
707  }
708 
709  bool hasDot4Insts() const {
710  return HasDot4Insts;
711  }
712 
713  bool hasDot5Insts() const {
714  return HasDot5Insts;
715  }
716 
717  bool hasDot6Insts() const {
718  return HasDot6Insts;
719  }
720 
721  bool hasDot7Insts() const {
722  return HasDot7Insts;
723  }
724 
725  bool hasDot8Insts() const {
726  return HasDot8Insts;
727  }
728 
729  bool hasMAIInsts() const {
730  return HasMAIInsts;
731  }
732 
733  bool hasFP8Insts() const {
734  return HasFP8Insts;
735  }
736 
737  bool hasPkFmacF16Inst() const {
738  return HasPkFmacF16Inst;
739  }
740 
741  bool hasAtomicFaddInsts() const {
743  }
744 
746 
748 
750 
752 
753  bool hasNoSdstCMPX() const {
754  return HasNoSdstCMPX;
755  }
756 
757  bool hasVscnt() const {
758  return HasVscnt;
759  }
760 
761  bool hasGetWaveIdInst() const {
762  return HasGetWaveIdInst;
763  }
764 
765  bool hasSMemTimeInst() const {
766  return HasSMemTimeInst;
767  }
768 
769  bool hasShaderCyclesRegister() const {
771  }
772 
773  bool hasVOP3Literal() const {
774  return HasVOP3Literal;
775  }
776 
777  bool hasNoDataDepHazard() const {
778  return HasNoDataDepHazard;
779  }
780 
782  return getGeneration() < SEA_ISLANDS;
783  }
784 
785  // Scratch is allocated in 256 dword per wave blocks for the entire
786  // wavefront. When viewed from the perspective of an arbitrary workitem, this
787  // is 4-byte aligned.
788  //
789  // Only 4-byte alignment is really needed to access anything. Transformations
790  // on the pointer value itself may rely on the alignment / known low bits of
791  // the pointer. Set this to something above the minimum to avoid needing
792  // dynamic realignment in common cases.
793  Align getStackAlignment() const { return Align(16); }
794 
795  bool enableMachineScheduler() const override {
796  return true;
797  }
798 
799  bool useAA() const override;
800 
801  bool enableSubRegLiveness() const override {
802  return true;
803  }
804 
807 
808  // static wrappers
809  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
810 
811  // XXX - Why is this here if it isn't in the default pass set?
812  bool enableEarlyIfConversion() const override {
813  return true;
814  }
815 
817  unsigned NumRegionInstrs) const override;
818 
819  unsigned getMaxNumUserSGPRs() const {
820  return 16;
821  }
822 
823  bool hasSMemRealTime() const {
824  return HasSMemRealTime;
825  }
826 
827  bool hasMovrel() const {
828  return HasMovrel;
829  }
830 
831  bool hasVGPRIndexMode() const {
832  return HasVGPRIndexMode;
833  }
834 
835  bool useVGPRIndexMode() const;
836 
837  bool hasScalarCompareEq64() const {
838  return getGeneration() >= VOLCANIC_ISLANDS;
839  }
840 
841  bool hasScalarStores() const {
842  return HasScalarStores;
843  }
844 
845  bool hasScalarAtomics() const {
846  return HasScalarAtomics;
847  }
848 
849  bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
850 
851  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
852  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
853 
854  /// \returns true if the subtarget has the v_permlane64_b32 instruction.
855  bool hasPermLane64() const { return getGeneration() >= GFX11; }
856 
857  bool hasDPP() const {
858  return HasDPP;
859  }
860 
861  bool hasDPPBroadcasts() const {
862  return HasDPP && getGeneration() < GFX10;
863  }
864 
865  bool hasDPPWavefrontShifts() const {
866  return HasDPP && getGeneration() < GFX10;
867  }
868 
869  bool hasDPP8() const {
870  return HasDPP8;
871  }
872 
873  bool has64BitDPP() const {
874  return Has64BitDPP;
875  }
876 
877  bool hasPackedFP32Ops() const {
878  return HasPackedFP32Ops;
879  }
880 
881  bool hasFmaakFmamkF32Insts() const {
882  return getGeneration() >= GFX10 || hasGFX940Insts();
883  }
884 
885  bool hasImageInsts() const {
886  return HasImageInsts;
887  }
888 
889  bool hasExtendedImageInsts() const {
890  return HasExtendedImageInsts;
891  }
892 
893  bool hasR128A16() const {
894  return HasR128A16;
895  }
896 
897  bool hasGFX10A16() const {
898  return HasGFX10A16;
899  }
900 
901  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
902 
903  bool hasG16() const { return HasG16; }
904 
905  bool hasOffset3fBug() const {
906  return HasOffset3fBug;
907  }
908 
909  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
910 
912 
913  bool hasNSAEncoding() const { return HasNSAEncoding; }
914 
915  unsigned getNSAMaxSize() const { return NSAMaxSize; }
916 
917  bool hasGFX10_AEncoding() const {
918  return GFX10_AEncoding;
919  }
920 
921  bool hasGFX10_BEncoding() const {
922  return GFX10_BEncoding;
923  }
924 
925  bool hasGFX10_3Insts() const {
926  return GFX10_3Insts;
927  }
928 
929  bool hasMadF16() const;
930 
931  bool hasMovB64() const { return GFX940Insts; }
932 
933  bool hasLshlAddB64() const { return GFX940Insts; }
934 
935  bool enableSIScheduler() const {
936  return EnableSIScheduler;
937  }
938 
939  bool loadStoreOptEnabled() const {
940  return EnableLoadStoreOpt;
941  }
942 
943  bool hasSGPRInitBug() const {
944  return SGPRInitBug;
945  }
946 
947  bool hasUserSGPRInit16Bug() const {
948  return UserSGPRInit16Bug && isWave32();
949  }
950 
952 
955  }
956 
957  bool hasMFMAInlineLiteralBug() const {
959  }
960 
961  bool has12DWordStoreHazard() const {
963  }
964 
965  // \returns true if the subtarget supports DWORDX3 load/store instructions.
966  bool hasDwordx3LoadStores() const {
967  return CIInsts;
968  }
969 
972  }
973 
974  bool hasReadM0SendMsgHazard() const {
977  }
978 
979  bool hasReadM0LdsDmaHazard() const {
981  }
982 
985  }
986 
987  bool hasVcmpxPermlaneHazard() const {
988  return HasVcmpxPermlaneHazard;
989  }
990 
993  }
994 
997  }
998 
999  bool hasLDSMisalignedBug() const {
1000  return LDSMisalignedBug && !EnableCuMode;
1001  }
1002 
1003  bool hasInstFwdPrefetchBug() const {
1004  return HasInstFwdPrefetchBug;
1005  }
1006 
1007  bool hasVcmpxExecWARHazard() const {
1008  return HasVcmpxExecWARHazard;
1009  }
1010 
1013  }
1014 
1015  // Shift amount of a 64 bit shift cannot be a highest allocated register
1016  // if also at the end of the allocation block.
1017  bool hasShift64HighRegBug() const {
1018  return GFX90AInsts && !GFX940Insts;
1019  }
1020 
1021  // Has one cycle hazard on transcendental instruction feeding a
1022  // non transcendental VALU.
1023  bool hasTransForwardingHazard() const { return GFX940Insts; }
1024 
1025  // Has one cycle hazard on a VALU instruction partially writing dst with
1026  // a shift of result bits feeding another VALU instruction.
1027  bool hasDstSelForwardingHazard() const { return GFX940Insts; }
1028 
1029  // Cannot use op_sel with v_dot instructions.
1030  bool hasDOTOpSelHazard() const { return GFX940Insts; }
1031 
1032  // Does not have HW interlocs for VALU writing and then reading SGPRs.
1033  bool hasVDecCoExecHazard() const {
1034  return GFX940Insts;
1035  }
1036 
1037  bool hasNSAtoVMEMBug() const {
1038  return HasNSAtoVMEMBug;
1039  }
1040 
1041  bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1042 
1043  bool hasHardClauses() const { return getGeneration() >= GFX10; }
1044 
1045  bool hasGFX90AInsts() const { return GFX90AInsts; }
1046 
1048  return getGeneration() == GFX10;
1049  }
1050 
1051  bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1052 
1053  bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1054 
1056  return getGeneration() >= GFX11;
1057  }
1058 
1059  bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; }
1060 
1061  /// Return if operations acting on VGPR tuples require even alignment.
1062  bool needsAlignedVGPRs() const { return GFX90AInsts; }
1063 
1064  /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1065  bool hasSPackHL() const { return GFX11Insts; }
1066 
1067  /// Return true if the target's EXP instruction has the COMPR flag, which
1068  /// affects the meaning of the EN (enable) bits.
1069  bool hasCompressedExport() const { return !GFX11Insts; }
1070 
1071  /// Return true if the target's EXP instruction supports the NULL export
1072  /// target.
1073  bool hasNullExportTarget() const { return !GFX11Insts; }
1074 
1075  bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1076 
1077  bool hasVOPDInsts() const { return HasVOPDInsts; }
1078 
1079  bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
1080 
1081  /// Return true if the target has the S_DELAY_ALU instruction.
1082  bool hasDelayAlu() const { return GFX11Insts; }
1083 
1084  bool hasPackedTID() const { return HasPackedTID; }
1085 
1086  // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1087  // hasGFX90AInsts is also true.
1088  bool hasGFX940Insts() const { return GFX940Insts; }
1089 
1090  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1091  /// SGPRs
1092  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1093 
1094  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1095  /// VGPRs
1096  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1097 
1098  /// Return occupancy for the given function. Used LDS and a number of
1099  /// registers if provided.
1100  /// Note, occupancy can be affected by the scratch allocation as well, but
1101  /// we do not have enough information to compute it.
1102  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1103  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1104 
1105  /// \returns true if the flat_scratch register should be initialized with the
1106  /// pointer to the wave's scratch memory rather than a size and offset.
1107  bool flatScratchIsPointer() const {
1109  }
1110 
1111  /// \returns true if the flat_scratch register is initialized by the HW.
1112  /// In this case it is readonly.
1114 
1115  /// \returns true if the machine has merged shaders in which s0-s7 are
1116  /// reserved by the hardware and user SGPRs start at s8
1117  bool hasMergedShaders() const {
1118  return getGeneration() >= GFX9;
1119  }
1120 
1121  // \returns true if the target supports the pre-NGG legacy geometry path.
1122  bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1123 
1124  /// \returns SGPR allocation granularity supported by the subtarget.
1125  unsigned getSGPRAllocGranule() const {
1127  }
1128 
1129  /// \returns SGPR encoding granularity supported by the subtarget.
1130  unsigned getSGPREncodingGranule() const {
1132  }
1133 
1134  /// \returns Total number of SGPRs supported by the subtarget.
1135  unsigned getTotalNumSGPRs() const {
1136  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1137  }
1138 
1139  /// \returns Addressable number of SGPRs supported by the subtarget.
1140  unsigned getAddressableNumSGPRs() const {
1142  }
1143 
1144  /// \returns Minimum number of SGPRs that meets the given number of waves per
1145  /// execution unit requirement supported by the subtarget.
1146  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1147  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1148  }
1149 
1150  /// \returns Maximum number of SGPRs that meets the given number of waves per
1151  /// execution unit requirement supported by the subtarget.
1152  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1153  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1154  }
1155 
1156  /// \returns Reserved number of SGPRs. This is common
1157  /// utility function called by MachineFunction and
1158  /// Function variants of getReservedNumSGPRs.
1159  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1160  /// \returns Reserved number of SGPRs for given machine function \p MF.
1161  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1162 
1163  /// \returns Reserved number of SGPRs for given function \p F.
1164  unsigned getReservedNumSGPRs(const Function &F) const;
1165 
1166  /// \returns max num SGPRs. This is the common utility
1167  /// function called by MachineFunction and Function
1168  /// variants of getMaxNumSGPRs.
1169  unsigned getBaseMaxNumSGPRs(const Function &F,
1170  std::pair<unsigned, unsigned> WavesPerEU,
1171  unsigned PreloadedSGPRs,
1172  unsigned ReservedNumSGPRs) const;
1173 
1174  /// \returns Maximum number of SGPRs that meets number of waves per execution
1175  /// unit requirement for function \p MF, or number of SGPRs explicitly
1176  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1177  ///
1178  /// \returns Value that meets number of waves per execution unit requirement
1179  /// if explicitly requested value cannot be converted to integer, violates
1180  /// subtarget's specifications, or does not meet number of waves per execution
1181  /// unit requirement.
1182  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1183 
1184  /// \returns Maximum number of SGPRs that meets number of waves per execution
1185  /// unit requirement for function \p F, or number of SGPRs explicitly
1186  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1187  ///
1188  /// \returns Value that meets number of waves per execution unit requirement
1189  /// if explicitly requested value cannot be converted to integer, violates
1190  /// subtarget's specifications, or does not meet number of waves per execution
1191  /// unit requirement.
1192  unsigned getMaxNumSGPRs(const Function &F) const;
1193 
1194  /// \returns VGPR allocation granularity supported by the subtarget.
1195  unsigned getVGPRAllocGranule() const {
1197  }
1198 
1199  /// \returns VGPR encoding granularity supported by the subtarget.
1200  unsigned getVGPREncodingGranule() const {
1202  }
1203 
1204  /// \returns Total number of VGPRs supported by the subtarget.
1205  unsigned getTotalNumVGPRs() const {
1206  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1207  }
1208 
1209  /// \returns Addressable number of VGPRs supported by the subtarget.
1210  unsigned getAddressableNumVGPRs() const {
1212  }
1213 
1214  /// \returns Minimum number of VGPRs that meets given number of waves per
1215  /// execution unit requirement supported by the subtarget.
1216  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1217  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1218  }
1219 
1220  /// \returns Maximum number of VGPRs that meets given number of waves per
1221  /// execution unit requirement supported by the subtarget.
1222  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1223  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1224  }
1225 
1226  /// \returns max num VGPRs. This is the common utility function
1227  /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1228  unsigned getBaseMaxNumVGPRs(const Function &F,
1229  std::pair<unsigned, unsigned> WavesPerEU) const;
1230  /// \returns Maximum number of VGPRs that meets number of waves per execution
1231  /// unit requirement for function \p F, or number of VGPRs explicitly
1232  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1233  ///
1234  /// \returns Value that meets number of waves per execution unit requirement
1235  /// if explicitly requested value cannot be converted to integer, violates
1236  /// subtarget's specifications, or does not meet number of waves per execution
1237  /// unit requirement.
1238  unsigned getMaxNumVGPRs(const Function &F) const;
1239 
1240  unsigned getMaxNumAGPRs(const Function &F) const {
1241  return getMaxNumVGPRs(F);
1242  }
1243 
1244  /// \returns Maximum number of VGPRs that meets number of waves per execution
1245  /// unit requirement for function \p MF, or number of VGPRs explicitly
1246  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1247  ///
1248  /// \returns Value that meets number of waves per execution unit requirement
1249  /// if explicitly requested value cannot be converted to integer, violates
1250  /// subtarget's specifications, or does not meet number of waves per execution
1251  /// unit requirement.
1252  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1253 
1254  void getPostRAMutations(
1255  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1256  const override;
1257 
1258  std::unique_ptr<ScheduleDAGMutation>
1260 
1261  bool isWave32() const {
1262  return getWavefrontSize() == 32;
1263  }
1264 
1265  bool isWave64() const {
1266  return getWavefrontSize() == 64;
1267  }
1268 
1270  return getRegisterInfo()->getBoolRC();
1271  }
1272 
1273  /// \returns Maximum number of work groups per compute unit supported by the
1274  /// subtarget and limited by given \p FlatWorkGroupSize.
1275  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1276  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1277  }
1278 
1279  /// \returns Minimum flat work group size supported by the subtarget.
1280  unsigned getMinFlatWorkGroupSize() const override {
1282  }
1283 
1284  /// \returns Maximum flat work group size supported by the subtarget.
1285  unsigned getMaxFlatWorkGroupSize() const override {
1287  }
1288 
1289  /// \returns Number of waves per execution unit required to support the given
1290  /// \p FlatWorkGroupSize.
1291  unsigned
1292  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1293  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1294  }
1295 
1296  /// \returns Minimum number of waves per execution unit supported by the
1297  /// subtarget.
1298  unsigned getMinWavesPerEU() const override {
1299  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1300  }
1301 
1302  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1303  SDep &Dep) const override;
1304 
1305  // \returns true if it's beneficial on this subtarget for the scheduler to
1306  // cluster stores as well as loads.
1307  bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1308 
1309  // \returns the number of address arguments from which to enable MIMG NSA
1310  // on supported architectures.
1311  unsigned getNSAThreshold(const MachineFunction &MF) const;
1312 };
1313 
1314 } // end namespace llvm
1315 
1316 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::GCNSubtarget::shouldClusterStores
bool shouldClusterStores() const
Definition: GCNSubtarget.h:1307
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:398
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:141
llvm::GCNSubtarget::hasReadM0LdsDmaHazard
bool hasReadM0LdsDmaHazard() const
Definition: GCNSubtarget.h:979
llvm::GCNSubtarget::hasVDecCoExecHazard
bool hasVDecCoExecHazard() const
Definition: GCNSubtarget.h:1033
llvm::GCNSubtarget::HasImageInsts
bool HasImageInsts
Definition: GCNSubtarget.h:129
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:60
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:897
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:354
llvm::GCNSubtarget::hasPermLane64
bool hasPermLane64() const
Definition: GCNSubtarget.h:855
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:701
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:735
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:643
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:616
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:121
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:38
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:162
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:247
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:685
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:258
llvm::GCNSubtarget::hasDstSelForwardingHazard
bool hasDstSelForwardingHazard() const
Definition: GCNSubtarget.h:1027
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:831
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:681
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:219
llvm::GCNSubtarget::hasFP8Insts
bool hasFP8Insts() const
Definition: GCNSubtarget.h:733
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:64
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:628
llvm::GCNSubtarget::GFX11Insts
bool GFX11Insts
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:93
llvm::Function
Definition: Function.h:60
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::hasImageInsts
bool hasImageInsts() const
Definition: GCNSubtarget.h:885
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::hasVALUPartialForwardingHazard
bool hasVALUPartialForwardingHazard() const
Definition: GCNSubtarget.h:1055
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:150
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:169
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:167
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:138
llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition: GCNSubtarget.h:1041
llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:915
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:827
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasMovB64
bool hasMovB64() const
Definition: GCNSubtarget.h:931
llvm::GCNSubtarget::UserSGPRInit16Bug
bool UserSGPRInit16Bug
Definition: GCNSubtarget.h:110
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:445
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:852
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:769
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:1062
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:578
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:179
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:304
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:61
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:193
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:508
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:467
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:612
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:142
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:487
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:183
llvm::GCNSubtarget::hasSPackHL
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
Definition: GCNSubtarget.h:1065
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:454
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::GCNSubtarget::hasVOPDInsts
bool hasVOPDInsts() const
Definition: GCNSubtarget.h:1077
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:574
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:382
llvm::GCNSubtarget::hasShift64HighRegBug
bool hasShift64HighRegBug() const
Definition: GCNSubtarget.h:1017
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:482
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1275
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1285
llvm::GCNSubtarget::hasGFX11FullVGPRs
bool hasGFX11FullVGPRs() const
Definition: GCNSubtarget.h:1075
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:546
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:705
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:868
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:693
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:342
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:966
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:913
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:255
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:124
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:702
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:793
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:530
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:433
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:192
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:939
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:59
llvm::GCNSubtarget::HasDot8Insts
bool HasDot8Insts
Definition: GCNSubtarget.h:146
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:761
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:189
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:119
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:171
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:896
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:877
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition: GCNSubtarget.h:190
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:837
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:690
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:227
llvm::GCNSubtarget::hasCompressedExport
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
Definition: GCNSubtarget.h:1069
llvm::GCNSubtarget::hasLDSFPAtomicAdd
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:849
llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:191
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::hasLdsDirect
bool hasLdsDirect() const
Definition: GCNSubtarget.h:1053
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:215
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:417
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:522
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:320
llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:917
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:648
llvm::GCNSubtarget::getMaxNumAGPRs
unsigned getMaxNumAGPRs(const Function &F) const
Definition: GCNSubtarget.h:1240
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:122
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:744
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:166
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:575
llvm::GCNSubtarget::BackOffBarrier
bool BackOffBarrier
Definition: GCNSubtarget.h:74
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:105
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:281
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:270
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:566
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1672
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:316
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:901
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:795
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:350
llvm::GCNSubtarget::GFX940Insts
bool GFX940Insts
Definition: GCNSubtarget.h:104
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1152
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:596
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:328
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:476
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:378
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:113
llvm::Legalizer
Definition: Legalizer.h:36
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:538
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:591
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:223
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:128
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:673
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:784
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:439
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:602
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:235
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:172
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:893
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:680
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:757
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1280
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:121
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:289
llvm::GCNSubtarget::hasGFX940Insts
bool hasGFX940Insts() const
Definition: GCNSubtarget.h:1088
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:773
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:78
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:366
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:765
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:428
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1261
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:943
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:73
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:823
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:370
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:239
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:665
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:721
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:921
llvm::GCNSubtarget::HasVOPDInsts
bool HasVOPDInsts
Definition: GCNSubtarget.h:196
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:86
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:737
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:43
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:554
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::SIFrameLowering
Definition: SIFrameLowering.h:16
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition: GCNSubtarget.h:1079
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1222
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:717
llvm::GCNSubtarget::hasFlatAtomicFaddF32Inst
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:751
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:62
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:458
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:845
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:526
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:806
llvm::GCNSubtarget::hasTransForwardingHazard
bool hasTransForwardingHazard() const
Definition: GCNSubtarget.h:1023
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:889
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:697
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:905
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:1007
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1200
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1265
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:188
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:90
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:243
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:174
llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:953
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:504
llvm::GCNSubtarget::hasAtomicPkFaddNoRtnInsts
bool hasAtomicPkFaddNoRtnInsts() const
Definition: GCNSubtarget.h:749
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:1003
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:137
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:428
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:767
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:1125
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:911
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:1084
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:402
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::HasAtomicPkFaddNoRtnInsts
bool HasAtomicPkFaddNoRtnInsts
Definition: GCNSubtarget.h:152
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:873
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:103
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:951
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:991
llvm::GCNSubtarget::NSAMaxSize
unsigned NSAMaxSize
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:656
llvm::GCNSubtarget::hasVOP3DPP
bool hasVOP3DPP() const
Definition: GCNSubtarget.h:1051
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:164
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:518
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:181
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:176
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1205
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:163
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:498
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:999
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:957
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:194
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:199
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:332
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:909
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:600
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:677
llvm::GCNSubtarget::HasAtomicFaddNoRtnInsts
bool HasAtomicFaddNoRtnInsts
Definition: GCNSubtarget.h:151
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:327
llvm::GCNSubtarget::hasFlatScratchSVSMode
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:589
llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:881
llvm::GCNSubtarget::hasAtomicFaddNoRtnInsts
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:747
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:154
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:144
llvm::GCNSubtarget::hasNullExportTarget
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
Definition: GCNSubtarget.h:1073
llvm::X86AS::FS
@ FS
Definition: X86.h:200
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1216
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:970
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1140
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:534
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:721
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:201
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:449
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:187
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1045
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:628
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:748
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:777
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:812
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:755
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1292
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:285
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:63
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:346
llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:1030
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:386
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:1011
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:149
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::GCNSubtarget::HasFlatAtomicFaddF32Inst
bool HasFlatAtomicFaddF32Inst
Definition: GCNSubtarget.h:153
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:75
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:300
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:608
llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard
bool hasFPAtomicToDenormModeHazard() const
Definition: GCNSubtarget.h:1047
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:472
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:741
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:570
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:713
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:841
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:925
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:856
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:118
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:1107
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:406
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:861
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:592
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition: GCNSubtarget.h:136
llvm::GCNSubtarget::hasVALUTransUseHazard
bool hasVALUTransUseHazard() const
Definition: GCNSubtarget.h:1059
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:865
llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:112
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:165
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:869
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:725
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:394
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:620
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1269
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:802
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:709
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:753
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1146
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:946
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:585
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:562
llvm::GCNSubtarget::supportsBackOffBarrier
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:514
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:72
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:654
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:105
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::hasAtomicFaddRtnInsts
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:745
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1195
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:1037
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:147
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:801
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:805
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:102
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:139
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:125
llvm::GCNSubtarget::hasUserSGPRInit16Bug
bool hasUserSGPRInit16Bug() const
Definition: GCNSubtarget.h:947
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:324
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:160
llvm::GCNSubtarget::hasDot8Insts
bool hasDot8Insts() const
Definition: GCNSubtarget.h:725
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:660
llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition: GCNSubtarget.h:1113
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:251
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:158
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1298
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:143
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:600
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:857
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:186
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:185
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:903
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:1043
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:358
llvm::GCNSubtarget::HasFP8Insts
bool HasFP8Insts
Definition: GCNSubtarget.h:148
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:492
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:634
llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:179
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:819
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:879
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:208
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:177
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:221
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:338
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:94
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:266
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1210
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::GCNSubtarget::hasLegacyGeometry
bool hasLegacyGeometry() const
Definition: GCNSubtarget.h:1122
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: GCNSubtarget.h:595
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:390
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:550
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:170
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:145
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:558
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:184
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:1117
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:97
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition: AMDGPUSubtarget.cpp:638
llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:957
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:362
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:256
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:542
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:1130
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:715
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:624
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:935
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:885
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:260
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:100
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:66
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:781
llvm::GCNSubtarget::hasDelayAlu
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
Definition: GCNSubtarget.h:1082
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:974
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:127
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:308
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1182
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:260
llvm::GCNSubtarget::EnableFlatScratch
bool EnableFlatScratch
Definition: GCNSubtarget.h:173
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:995
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:374
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:163
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:952
llvm::GCNSubtarget::HasAtomicFaddRtnInsts
bool HasAtomicFaddRtnInsts
Definition: GCNSubtarget.h:150
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:181
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:58
llvm::GCNSubtarget::HasGFX11FullVGPRs
bool HasGFX11FullVGPRs
Definition: GCNSubtarget.h:195
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:123
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:312
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1135
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:961
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:204
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:987
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:689
llvm::CallLowering
Definition: CallLowering.h:44
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:667
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:413
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:669
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:838
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:762
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:168
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:729
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:133
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:422
llvm::GCNSubtarget::hasLshlAddB64
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:933
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:604
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:131
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:231
llvm::GCNSubtarget::hasReadM0LdsDirectHazard
bool hasReadM0LdsDirectHazard() const
Definition: GCNSubtarget.h:983