LLVM  13.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCInstrInfo;
28 
29 } // namespace llvm
30 
31 #define GET_SUBTARGETINFO_HEADER
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
34 namespace llvm {
35 
36 class GCNTargetMachine;
37 
38 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
39  public AMDGPUSubtarget {
40 
42 
43 public:
44  // Following 2 enums are documented at:
45  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
46  enum class TrapHandlerAbi {
47  NONE = 0x00,
48  AMDHSA = 0x01,
49  };
50 
51  enum class TrapID {
52  LLVMAMDHSATrap = 0x02,
53  LLVMAMDHSADebugTrap = 0x03,
54  };
55 
56 private:
57  /// GlobalISel related APIs.
58  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
59  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
60  std::unique_ptr<InstructionSelector> InstSelector;
61  std::unique_ptr<LegalizerInfo> Legalizer;
62  std::unique_ptr<RegisterBankInfo> RegBankInfo;
63 
64 protected:
65  // Basic subtarget description.
68  unsigned Gen;
72 
73  // Possibly statically set by tablegen, but may want to be overridden.
74  bool FastFMAF32;
78 
79  // Dynamically set bits that enable features.
86 
87  // This should not be used directly. 'TargetID' tracks the dynamic settings
88  // for XNACK.
90 
94 
95  // Used as options.
101  bool DumpCode;
102 
103  // Subtarget statically properties set by tablegen
104  bool FP64;
105  bool FMA;
106  bool MIMG_R128;
107  bool IsGCN;
108  bool CIInsts;
109  bool GFX8Insts;
110  bool GFX9Insts;
121  bool HasMovrel;
130  bool HasDPP;
131  bool HasDPP8;
137  bool HasG16;
152 
153  // This should not be used directly. 'TargetID' tracks the dynamic settings
154  // for SRAMECC.
156 
158  bool HasVscnt;
173  bool CaymanISA;
174  bool CFALUBug;
183 
195 
196  // Dummy feature to use for assembler in tablegen.
198 
200 private:
201  SIInstrInfo InstrInfo;
202  SITargetLowering TLInfo;
203  SIFrameLowering FrameLowering;
204 
205 public:
206  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
207  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
208 
209  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
210  const GCNTargetMachine &TM);
211  ~GCNSubtarget() override;
212 
214  StringRef GPU, StringRef FS);
215 
216  const SIInstrInfo *getInstrInfo() const override {
217  return &InstrInfo;
218  }
219 
220  const SIFrameLowering *getFrameLowering() const override {
221  return &FrameLowering;
222  }
223 
224  const SITargetLowering *getTargetLowering() const override {
225  return &TLInfo;
226  }
227 
228  const SIRegisterInfo *getRegisterInfo() const override {
229  return &InstrInfo.getRegisterInfo();
230  }
231 
232  const CallLowering *getCallLowering() const override {
233  return CallLoweringInfo.get();
234  }
235 
236  const InlineAsmLowering *getInlineAsmLowering() const override {
237  return InlineAsmLoweringInfo.get();
238  }
239 
241  return InstSelector.get();
242  }
243 
244  const LegalizerInfo *getLegalizerInfo() const override {
245  return Legalizer.get();
246  }
247 
248  const RegisterBankInfo *getRegBankInfo() const override {
249  return RegBankInfo.get();
250  }
251 
253  return TargetID;
254  }
255 
256  // Nothing implemented, just prevent crashes on use.
257  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
258  return &TSInfo;
259  }
260 
261  const InstrItineraryData *getInstrItineraryData() const override {
262  return &InstrItins;
263  }
264 
266 
268  return (Generation)Gen;
269  }
270 
271  /// Return the number of high bits known to be zero fror a frame index.
274  }
275 
276  int getLDSBankCount() const {
277  return LDSBankCount;
278  }
279 
280  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
281  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
282  }
283 
284  unsigned getConstantBusLimit(unsigned Opcode) const;
285 
286  bool hasIntClamp() const {
287  return HasIntClamp;
288  }
289 
290  bool hasFP64() const {
291  return FP64;
292  }
293 
294  bool hasMIMG_R128() const {
295  return MIMG_R128;
296  }
297 
298  bool hasHWFP64() const {
299  return FP64;
300  }
301 
302  bool hasFastFMAF32() const {
303  return FastFMAF32;
304  }
305 
306  bool hasHalfRate64Ops() const {
307  return HalfRate64Ops;
308  }
309 
310  bool hasFullRate64Ops() const {
311  return FullRate64Ops;
312  }
313 
314  bool hasAddr64() const {
316  }
317 
318  bool hasFlat() const {
320  }
321 
322  // Return true if the target only has the reverse operand versions of VALU
323  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
324  bool hasOnlyRevVALUShifts() const {
325  return getGeneration() >= VOLCANIC_ISLANDS;
326  }
327 
328  bool hasFractBug() const {
329  return getGeneration() == SOUTHERN_ISLANDS;
330  }
331 
332  bool hasBFE() const {
333  return true;
334  }
335 
336  bool hasBFI() const {
337  return true;
338  }
339 
340  bool hasBFM() const {
341  return hasBFE();
342  }
343 
344  bool hasBCNT(unsigned Size) const {
345  return true;
346  }
347 
348  bool hasFFBL() const {
349  return true;
350  }
351 
352  bool hasFFBH() const {
353  return true;
354  }
355 
356  bool hasMed3_16() const {
358  }
359 
360  bool hasMin3Max3_16() const {
362  }
363 
364  bool hasFmaMixInsts() const {
365  return HasFmaMixInsts;
366  }
367 
368  bool hasCARRY() const {
369  return true;
370  }
371 
372  bool hasFMA() const {
373  return FMA;
374  }
375 
376  bool hasSwap() const {
377  return GFX9Insts;
378  }
379 
380  bool hasScalarPackInsts() const {
381  return GFX9Insts;
382  }
383 
384  bool hasScalarMulHiInsts() const {
385  return GFX9Insts;
386  }
387 
390  }
391 
392  bool supportsGetDoorbellID() const {
393  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
394  return getGeneration() >= GFX9;
395  }
396 
397  /// True if the offset field of DS instructions works as expected. On SI, the
398  /// offset uses a 16-bit adder and does not always wrap properly.
399  bool hasUsableDSOffset() const {
400  return getGeneration() >= SEA_ISLANDS;
401  }
402 
405  }
406 
407  /// Condition output from div_scale is usable.
409  return getGeneration() != SOUTHERN_ISLANDS;
410  }
411 
412  /// Extra wait hazard is needed in some cases before
413  /// s_cbranch_vccnz/s_cbranch_vccz.
414  bool hasReadVCCZBug() const {
415  return getGeneration() <= SEA_ISLANDS;
416  }
417 
418  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
420  return getGeneration() >= GFX10;
421  }
422 
423  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
424  /// was written by a VALU instruction.
426  return getGeneration() == SOUTHERN_ISLANDS;
427  }
428 
429  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
430  /// SGPR was written by a VALU Instruction.
432  return getGeneration() >= VOLCANIC_ISLANDS;
433  }
434 
435  bool hasRFEHazards() const {
436  return getGeneration() >= VOLCANIC_ISLANDS;
437  }
438 
439  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
440  unsigned getSetRegWaitStates() const {
441  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
442  }
443 
444  bool dumpCode() const {
445  return DumpCode;
446  }
447 
448  /// Return the amount of LDS that can be used that will not restrict the
449  /// occupancy lower than WaveCount.
450  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
451  const Function &) const;
452 
455  }
456 
457  /// \returns If target supports S_DENORM_MODE.
458  bool hasDenormModeInst() const {
460  }
461 
462  bool useFlatForGlobal() const {
463  return FlatForGlobal;
464  }
465 
466  /// \returns If target supports ds_read/write_b128 and user enables generation
467  /// of ds_read/write_b128.
468  bool useDS128() const {
469  return CIInsts && EnableDS128;
470  }
471 
472  /// \return If target supports ds_read/write_b96/128.
473  bool hasDS96AndDS128() const {
474  return CIInsts;
475  }
476 
477  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
478  bool haveRoundOpsF64() const {
479  return CIInsts;
480  }
481 
482  /// \returns If MUBUF instructions always perform range checking, even for
483  /// buffer resources used for private memory access.
486  }
487 
488  /// \returns If target requires PRT Struct NULL support (zero result registers
489  /// for sparse texture support).
490  bool usePRTStrictNull() const {
491  return EnablePRTStrictNull;
492  }
493 
496  }
497 
499  return UnalignedBufferAccess;
500  }
501 
504  }
505 
506  bool hasUnalignedDSAccess() const {
507  return UnalignedDSAccess;
508  }
509 
512  }
513 
515  return UnalignedScratchAccess;
516  }
517 
518  bool hasUnalignedAccessMode() const {
519  return UnalignedAccessMode;
520  }
521 
522  bool hasApertureRegs() const {
523  return HasApertureRegs;
524  }
525 
526  bool isTrapHandlerEnabled() const {
527  return TrapHandler;
528  }
529 
530  bool isXNACKEnabled() const {
531  return TargetID.isXnackOnOrAny();
532  }
533 
534  bool isTgSplitEnabled() const {
535  return EnableTgSplit;
536  }
537 
538  bool isCuModeEnabled() const {
539  return EnableCuMode;
540  }
541 
542  bool hasFlatAddressSpace() const {
543  return FlatAddressSpace;
544  }
545 
546  bool hasFlatScrRegister() const {
547  return hasFlatAddressSpace();
548  }
549 
550  bool hasFlatInstOffsets() const {
551  return FlatInstOffsets;
552  }
553 
554  bool hasFlatGlobalInsts() const {
555  return FlatGlobalInsts;
556  }
557 
558  bool hasFlatScratchInsts() const {
559  return FlatScratchInsts;
560  }
561 
562  // Check if target supports ST addressing mode with FLAT scratch instructions.
563  // The ST addressing mode means no registers are used, either VGPR or SGPR,
564  // but only immediate offset is swizzled and added to the FLAT scratch base.
565  bool hasFlatScratchSTMode() const {
566  return hasFlatScratchInsts() && hasGFX10_3Insts();
567  }
568 
570  return ScalarFlatScratchInsts;
571  }
572 
573  bool hasGlobalAddTidInsts() const {
574  return GFX10_BEncoding;
575  }
576 
577  bool hasAtomicCSub() const {
578  return GFX10_BEncoding;
579  }
580 
582  return getGeneration() >= GFX9;
583  }
584 
585  bool hasFlatSegmentOffsetBug() const {
587  }
588 
590  return getGeneration() > GFX9;
591  }
592 
593  bool hasD16LoadStore() const {
594  return getGeneration() >= GFX9;
595  }
596 
597  bool d16PreservesUnusedBits() const {
599  }
600 
601  bool hasD16Images() const {
602  return getGeneration() >= VOLCANIC_ISLANDS;
603  }
604 
605  /// Return if most LDS instructions have an m0 use that require m0 to be
606  /// iniitalized.
607  bool ldsRequiresM0Init() const {
608  return getGeneration() < GFX9;
609  }
610 
611  // True if the hardware rewinds and replays GWS operations if a wave is
612  // preempted.
613  //
614  // If this is false, a GWS operation requires testing if a nack set the
615  // MEM_VIOL bit, and repeating if so.
616  bool hasGWSAutoReplay() const {
617  return getGeneration() >= GFX9;
618  }
619 
620  /// \returns if target has ds_gws_sema_release_all instruction.
621  bool hasGWSSemaReleaseAll() const {
622  return CIInsts;
623  }
624 
625  /// \returns true if the target has integer add/sub instructions that do not
626  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
627  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
628  /// for saturation.
629  bool hasAddNoCarry() const {
630  return AddNoCarryInsts;
631  }
632 
633  bool hasUnpackedD16VMem() const {
634  return HasUnpackedD16VMem;
635  }
636 
637  // Covers VS/PS/CS graphics shaders
638  bool isMesaGfxShader(const Function &F) const {
639  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
640  }
641 
642  bool hasMad64_32() const {
643  return getGeneration() >= SEA_ISLANDS;
644  }
645 
646  bool hasSDWAOmod() const {
647  return HasSDWAOmod;
648  }
649 
650  bool hasSDWAScalar() const {
651  return HasSDWAScalar;
652  }
653 
654  bool hasSDWASdst() const {
655  return HasSDWASdst;
656  }
657 
658  bool hasSDWAMac() const {
659  return HasSDWAMac;
660  }
661 
662  bool hasSDWAOutModsVOPC() const {
663  return HasSDWAOutModsVOPC;
664  }
665 
666  bool hasDLInsts() const {
667  return HasDLInsts;
668  }
669 
670  bool hasDot1Insts() const {
671  return HasDot1Insts;
672  }
673 
674  bool hasDot2Insts() const {
675  return HasDot2Insts;
676  }
677 
678  bool hasDot3Insts() const {
679  return HasDot3Insts;
680  }
681 
682  bool hasDot4Insts() const {
683  return HasDot4Insts;
684  }
685 
686  bool hasDot5Insts() const {
687  return HasDot5Insts;
688  }
689 
690  bool hasDot6Insts() const {
691  return HasDot6Insts;
692  }
693 
694  bool hasDot7Insts() const {
695  return HasDot7Insts;
696  }
697 
698  bool hasMAIInsts() const {
699  return HasMAIInsts;
700  }
701 
702  bool hasPkFmacF16Inst() const {
703  return HasPkFmacF16Inst;
704  }
705 
706  bool hasAtomicFaddInsts() const {
707  return HasAtomicFaddInsts;
708  }
709 
710  bool hasNoSdstCMPX() const {
711  return HasNoSdstCMPX;
712  }
713 
714  bool hasVscnt() const {
715  return HasVscnt;
716  }
717 
718  bool hasGetWaveIdInst() const {
719  return HasGetWaveIdInst;
720  }
721 
722  bool hasSMemTimeInst() const {
723  return HasSMemTimeInst;
724  }
725 
726  bool hasShaderCyclesRegister() const {
728  }
729 
730  bool hasRegisterBanking() const {
731  return HasRegisterBanking;
732  }
733 
734  bool hasVOP3Literal() const {
735  return HasVOP3Literal;
736  }
737 
738  bool hasNoDataDepHazard() const {
739  return HasNoDataDepHazard;
740  }
741 
743  return getGeneration() < SEA_ISLANDS;
744  }
745 
746  // Scratch is allocated in 256 dword per wave blocks for the entire
747  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
748  // is 4-byte aligned.
749  //
750  // Only 4-byte alignment is really needed to access anything. Transformations
751  // on the pointer value itself may rely on the alignment / known low bits of
752  // the pointer. Set this to something above the minimum to avoid needing
753  // dynamic realignment in common cases.
754  Align getStackAlignment() const { return Align(16); }
755 
756  bool enableMachineScheduler() const override {
757  return true;
758  }
759 
760  bool useAA() const override;
761 
762  bool enableSubRegLiveness() const override {
763  return true;
764  }
765 
768 
769  // static wrappers
770  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
771 
772  // XXX - Why is this here if it isn't in the default pass set?
773  bool enableEarlyIfConversion() const override {
774  return true;
775  }
776 
777  bool enableFlatScratch() const;
778 
780  unsigned NumRegionInstrs) const override;
781 
782  unsigned getMaxNumUserSGPRs() const {
783  return 16;
784  }
785 
786  bool hasSMemRealTime() const {
787  return HasSMemRealTime;
788  }
789 
790  bool hasMovrel() const {
791  return HasMovrel;
792  }
793 
794  bool hasVGPRIndexMode() const {
795  return HasVGPRIndexMode;
796  }
797 
798  bool useVGPRIndexMode() const;
799 
800  bool hasScalarCompareEq64() const {
801  return getGeneration() >= VOLCANIC_ISLANDS;
802  }
803 
804  bool hasScalarStores() const {
805  return HasScalarStores;
806  }
807 
808  bool hasScalarAtomics() const {
809  return HasScalarAtomics;
810  }
811 
812  bool hasLDSFPAtomics() const {
813  return GFX8Insts;
814  }
815 
816  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
817  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
818 
819  bool hasDPP() const {
820  return HasDPP;
821  }
822 
823  bool hasDPPBroadcasts() const {
824  return HasDPP && getGeneration() < GFX10;
825  }
826 
827  bool hasDPPWavefrontShifts() const {
828  return HasDPP && getGeneration() < GFX10;
829  }
830 
831  bool hasDPP8() const {
832  return HasDPP8;
833  }
834 
835  bool has64BitDPP() const {
836  return Has64BitDPP;
837  }
838 
839  bool hasPackedFP32Ops() const {
840  return HasPackedFP32Ops;
841  }
842 
843  bool hasFmaakFmamkF32Insts() const {
844  return getGeneration() >= GFX10;
845  }
846 
847  bool hasExtendedImageInsts() const {
848  return HasExtendedImageInsts;
849  }
850 
851  bool hasR128A16() const {
852  return HasR128A16;
853  }
854 
855  bool hasGFX10A16() const {
856  return HasGFX10A16;
857  }
858 
859  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
860 
861  bool hasG16() const { return HasG16; }
862 
863  bool hasOffset3fBug() const {
864  return HasOffset3fBug;
865  }
866 
867  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
868 
870 
871  bool hasNSAEncoding() const { return HasNSAEncoding; }
872 
873  bool hasGFX10_BEncoding() const {
874  return GFX10_BEncoding;
875  }
876 
877  bool hasGFX10_3Insts() const {
878  return GFX10_3Insts;
879  }
880 
881  bool hasMadF16() const;
882 
883  bool enableSIScheduler() const {
884  return EnableSIScheduler;
885  }
886 
887  bool loadStoreOptEnabled() const {
888  return EnableLoadStoreOpt;
889  }
890 
891  bool hasSGPRInitBug() const {
892  return SGPRInitBug;
893  }
894 
896 
899  }
900 
901  bool hasMFMAInlineLiteralBug() const {
903  }
904 
905  bool has12DWordStoreHazard() const {
907  }
908 
909  // \returns true if the subtarget supports DWORDX3 load/store instructions.
910  bool hasDwordx3LoadStores() const {
911  return CIInsts;
912  }
913 
916  }
917 
918  bool hasReadM0SendMsgHazard() const {
921  }
922 
923  bool hasVcmpxPermlaneHazard() const {
924  return HasVcmpxPermlaneHazard;
925  }
926 
929  }
930 
933  }
934 
935  bool hasLDSMisalignedBug() const {
936  return LDSMisalignedBug && !EnableCuMode;
937  }
938 
939  bool hasInstFwdPrefetchBug() const {
940  return HasInstFwdPrefetchBug;
941  }
942 
943  bool hasVcmpxExecWARHazard() const {
944  return HasVcmpxExecWARHazard;
945  }
946 
949  }
950 
951  bool hasNSAtoVMEMBug() const {
952  return HasNSAtoVMEMBug;
953  }
954 
955  bool hasHardClauses() const { return getGeneration() >= GFX10; }
956 
957  bool hasGFX90AInsts() const { return GFX90AInsts; }
958 
959  /// Return if operations acting on VGPR tuples require even alignment.
960  bool needsAlignedVGPRs() const { return GFX90AInsts; }
961 
962  bool hasPackedTID() const { return HasPackedTID; }
963 
964  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
965  /// SGPRs
966  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
967 
968  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
969  /// VGPRs
970  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
971 
972  /// Return occupancy for the given function. Used LDS and a number of
973  /// registers if provided.
974  /// Note, occupancy can be affected by the scratch allocation as well, but
975  /// we do not have enough information to compute it.
976  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
977  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
978 
979  /// \returns true if the flat_scratch register should be initialized with the
980  /// pointer to the wave's scratch memory rather than a size and offset.
981  bool flatScratchIsPointer() const {
983  }
984 
985  /// \returns true if the machine has merged shaders in which s0-s7 are
986  /// reserved by the hardware and user SGPRs start at s8
987  bool hasMergedShaders() const {
988  return getGeneration() >= GFX9;
989  }
990 
991  /// \returns SGPR allocation granularity supported by the subtarget.
992  unsigned getSGPRAllocGranule() const {
994  }
995 
996  /// \returns SGPR encoding granularity supported by the subtarget.
997  unsigned getSGPREncodingGranule() const {
999  }
1000 
1001  /// \returns Total number of SGPRs supported by the subtarget.
1002  unsigned getTotalNumSGPRs() const {
1003  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1004  }
1005 
1006  /// \returns Addressable number of SGPRs supported by the subtarget.
1007  unsigned getAddressableNumSGPRs() const {
1009  }
1010 
1011  /// \returns Minimum number of SGPRs that meets the given number of waves per
1012  /// execution unit requirement supported by the subtarget.
1013  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1014  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1015  }
1016 
1017  /// \returns Maximum number of SGPRs that meets the given number of waves per
1018  /// execution unit requirement supported by the subtarget.
1019  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1020  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1021  }
1022 
1023  /// \returns Reserved number of SGPRs for given function \p MF.
1024  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1025 
1026  /// \returns Maximum number of SGPRs that meets number of waves per execution
1027  /// unit requirement for function \p MF, or number of SGPRs explicitly
1028  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1029  ///
1030  /// \returns Value that meets number of waves per execution unit requirement
1031  /// if explicitly requested value cannot be converted to integer, violates
1032  /// subtarget's specifications, or does not meet number of waves per execution
1033  /// unit requirement.
1034  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1035 
1036  /// \returns VGPR allocation granularity supported by the subtarget.
1037  unsigned getVGPRAllocGranule() const {
1039  }
1040 
1041  /// \returns VGPR encoding granularity supported by the subtarget.
1042  unsigned getVGPREncodingGranule() const {
1044  }
1045 
1046  /// \returns Total number of VGPRs supported by the subtarget.
1047  unsigned getTotalNumVGPRs() const {
1048  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1049  }
1050 
1051  /// \returns Addressable number of VGPRs supported by the subtarget.
1052  unsigned getAddressableNumVGPRs() const {
1054  }
1055 
1056  /// \returns Minimum number of VGPRs that meets given number of waves per
1057  /// execution unit requirement supported by the subtarget.
1058  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1059  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1060  }
1061 
1062  /// \returns Maximum number of VGPRs that meets given number of waves per
1063  /// execution unit requirement supported by the subtarget.
1064  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1065  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1066  }
1067 
1068  /// \returns Maximum number of VGPRs that meets number of waves per execution
1069  /// unit requirement for function \p MF, or number of VGPRs explicitly
1070  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1071  ///
1072  /// \returns Value that meets number of waves per execution unit requirement
1073  /// if explicitly requested value cannot be converted to integer, violates
1074  /// subtarget's specifications, or does not meet number of waves per execution
1075  /// unit requirement.
1076  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1077 
1078  void getPostRAMutations(
1079  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1080  const override;
1081 
1082  bool isWave32() const {
1083  return getWavefrontSize() == 32;
1084  }
1085 
1086  bool isWave64() const {
1087  return getWavefrontSize() == 64;
1088  }
1089 
1091  return getRegisterInfo()->getBoolRC();
1092  }
1093 
1094  /// \returns Maximum number of work groups per compute unit supported by the
1095  /// subtarget and limited by given \p FlatWorkGroupSize.
1096  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1097  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1098  }
1099 
1100  /// \returns Minimum flat work group size supported by the subtarget.
1101  unsigned getMinFlatWorkGroupSize() const override {
1103  }
1104 
1105  /// \returns Maximum flat work group size supported by the subtarget.
1106  unsigned getMaxFlatWorkGroupSize() const override {
1108  }
1109 
1110  /// \returns Number of waves per execution unit required to support the given
1111  /// \p FlatWorkGroupSize.
1112  unsigned
1113  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1114  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1115  }
1116 
1117  /// \returns Minimum number of waves per execution unit supported by the
1118  /// subtarget.
1119  unsigned getMinWavesPerEU() const override {
1120  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1121  }
1122 
1123  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1124  SDep &Dep) const override;
1125 };
1126 
1127 } // end namespace llvm
1128 
1129 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:384
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:143
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:855
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:340
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:674
llvm
Definition: AllocatorList.h:23
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:557
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:616
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:589
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:46
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:159
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:248
llvm::GCNSubtarget::hasRegisterBanking
bool hasRegisterBanking() const
Definition: GCNSubtarget.h:730
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:658
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:253
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:794
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:654
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:220
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:81
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:601
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:100
llvm::Function
Definition: Function.h:61
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:142
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:133
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:130
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:167
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:165
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:790
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:431
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:817
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:726
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:960
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:558
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:290
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:193
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:494
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:453
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:585
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:144
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:473
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:440
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::HasVertexCache
bool HasVertexCache
Definition: GCNSubtarget.h:177
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:554
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:368
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:468
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1096
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1106
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:526
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:678
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:686
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:666
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:328
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:910
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:871
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:223
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:129
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:524
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:754
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:510
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:419
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:192
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:887
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:179
llvm::GCNSubtarget::TexVTXClauseSize
short TexVTXClauseSize
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:718
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:190
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:124
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:169
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:711
llvm::X86AS::FS
@ FS
Definition: X86.h:183
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:839
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:800
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:512
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:228
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:191
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:83
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:216
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:403
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:502
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:306
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:621
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:127
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:566
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:106
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:164
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:122
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:635
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:85
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
Definition: GCNSubtarget.h:272
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:546
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1313
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:302
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:859
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:113
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:756
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:336
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1019
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:656
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:314
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:96
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:462
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:364
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:118
llvm::Legalizer
Definition: Legalizer.h:31
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:518
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:569
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:224
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:114
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:646
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:606
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:425
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:662
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:236
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:851
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:714
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1101
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:101
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:280
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:734
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::CFALUBug
bool CFALUBug
Definition: GCNSubtarget.h:174
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:352
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:722
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:414
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1082
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:891
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:81
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:786
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:356
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:240
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:638
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:543
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:873
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:93
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:702
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:51
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:534
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::SIFrameLowering
Definition: SIFrameLowering.h:21
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1064
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:690
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:70
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:444
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:808
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:506
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:767
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:847
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:670
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:863
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:943
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:180
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1042
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1086
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:189
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:97
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:207
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:105
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:244
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:170
llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:897
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:490
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:939
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::HasRegisterBanking
bool HasRegisterBanking
Definition: GCNSubtarget.h:162
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:181
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:139
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:423
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:589
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:171
llvm::GCNSubtarget::IsGCN
bool IsGCN
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:992
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:869
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:962
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:388
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:835
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:119
llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:895
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:927
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:629
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:498
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:182
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1047
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:197
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:484
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:185
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:935
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:901
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:194
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:197
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:318
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:867
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:573
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:650
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:278
llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:843
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:151
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:146
llvm::GCNSubtarget::hasLDSFPAtomics
bool hasLDSFPAtomics() const
Definition: GCNSubtarget.h:812
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1058
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:914
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1007
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:514
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:694
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:199
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:435
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:188
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:957
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:688
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:570
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:738
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:773
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:577
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1113
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:276
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:71
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:332
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:372
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:947
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:149
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:96
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:286
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:581
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:458
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:706
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:550
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:686
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:804
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:877
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:674
llvm::GCNSubtarget::CaymanISA
bool CaymanISA
Definition: GCNSubtarget.h:173
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:123
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:981
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:392
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:823
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:652
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:125
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:827
llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:163
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:831
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:547
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:380
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:593
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1090
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:810
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:682
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:710
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1013
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: AMDGPUSubtarget.cpp:328
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:972
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:565
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:542
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:80
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:138
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:697
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:112
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1037
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:951
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:148
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:762
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:766
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:110
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:141
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:310
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:157
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:633
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:252
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:155
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1119
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:145
llvm::GCNSubtarget::HasAtomicFaddInsts
bool HasAtomicFaddInsts
Definition: GCNSubtarget.h:150
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:660
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:819
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:187
llvm::SITargetLowering
Definition: SIISelLowering.h:30
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:186
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:861
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:955
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:344
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:478
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized.
Definition: GCNSubtarget.h:607
llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:782
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:694
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:131
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:176
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:324
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:267
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1052
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:376
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:530
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:168
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:121
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:147
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:538
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:332
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:987
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:104
llvm::GCNSubtarget::R600ALUInst
bool R600ALUInst
Definition: GCNSubtarget.h:172
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:348
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:257
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:522
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:997
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:537
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:597
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:883
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:700
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:261
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:158
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:74
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:742
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:918
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:294
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1041
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:255
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:931
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:360
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:160
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:174
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:66
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:298
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1002
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:905
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:184
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:923
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:662
llvm::CallLowering
Definition: CallLowering.h:43
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:714
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:399
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:642
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:659
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:166
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:698
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:137
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:408
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:577
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:136
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:232