LLVM  10.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7,
59  GFX10 = 8
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
78  unsigned MaxWavesPerEU;
80  unsigned WavefrontSize;
81 
82 public:
83  AMDGPUSubtarget(const Triple &TT);
84 
85  static const AMDGPUSubtarget &get(const MachineFunction &MF);
86  static const AMDGPUSubtarget &get(const TargetMachine &TM,
87  const Function &F);
88 
89  /// \returns Default range flat work group size for a calling convention.
90  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
91 
92  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
93  /// for function \p F, or minimum/maximum flat work group sizes explicitly
94  /// requested using "amdgpu-flat-work-group-size" attribute attached to
95  /// function \p F.
96  ///
97  /// \returns Subtarget's default values if explicitly requested values cannot
98  /// be converted to integer, or violate subtarget's specifications.
99  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
100 
101  /// \returns Subtarget's default pair of minimum/maximum number of waves per
102  /// execution unit for function \p F, or minimum/maximum number of waves per
103  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
104  /// attached to function \p F.
105  ///
106  /// \returns Subtarget's default values if explicitly requested values cannot
107  /// be converted to integer, violate subtarget's specifications, or are not
108  /// compatible with minimum/maximum number of waves limited by flat work group
109  /// size, register usage, and/or lds usage.
110  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
111 
112  /// Return the amount of LDS that can be used that will not restrict the
113  /// occupancy lower than WaveCount.
114  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
115  const Function &) const;
116 
117  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
118  /// the given LDS memory size is the only constraint.
119  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
120 
121  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
122 
123  bool isAmdHsaOS() const {
124  return TargetTriple.getOS() == Triple::AMDHSA;
125  }
126 
127  bool isAmdPalOS() const {
128  return TargetTriple.getOS() == Triple::AMDPAL;
129  }
130 
131  bool isMesa3DOS() const {
132  return TargetTriple.getOS() == Triple::Mesa3D;
133  }
134 
135  bool isMesaKernel(const Function &F) const {
136  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
137  }
138 
139  bool isAmdHsaOrMesa(const Function &F) const {
140  return isAmdHsaOS() || isMesaKernel(F);
141  }
142 
143  bool has16BitInsts() const {
144  return Has16BitInsts;
145  }
146 
147  bool hasMadMixInsts() const {
148  return HasMadMixInsts;
149  }
150 
151  bool hasFP32Denormals() const {
152  return FP32Denormals;
153  }
154 
155  bool hasFPExceptions() const {
156  return FPExceptions;
157  }
158 
159  bool hasSDWA() const {
160  return HasSDWA;
161  }
162 
163  bool hasVOP3PInsts() const {
164  return HasVOP3PInsts;
165  }
166 
167  bool hasMulI24() const {
168  return HasMulI24;
169  }
170 
171  bool hasMulU24() const {
172  return HasMulU24;
173  }
174 
175  bool hasInv2PiInlineImm() const {
176  return HasInv2PiInlineImm;
177  }
178 
179  bool hasFminFmaxLegacy() const {
180  return HasFminFmaxLegacy;
181  }
182 
183  bool hasTrigReducedRange() const {
184  return HasTrigReducedRange;
185  }
186 
187  bool isPromoteAllocaEnabled() const {
188  return EnablePromoteAlloca;
189  }
190 
191  unsigned getWavefrontSize() const {
192  return WavefrontSize;
193  }
194 
195  int getLocalMemorySize() const {
196  return LocalMemorySize;
197  }
198 
200  return isAmdHsaOS() ? Align(8) : Align(4);
201  }
202 
203  /// Returns the offset in bytes from the start of the input buffer
204  /// of the first explicit kernel argument.
205  unsigned getExplicitKernelArgOffset(const Function &F) const {
206  return isAmdHsaOrMesa(F) ? 0 : 36;
207  }
208 
209  /// \returns Maximum number of work groups per compute unit supported by the
210  /// subtarget and limited by given \p FlatWorkGroupSize.
211  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
212 
213  /// \returns Minimum flat work group size supported by the subtarget.
214  virtual unsigned getMinFlatWorkGroupSize() const = 0;
215 
216  /// \returns Maximum flat work group size supported by the subtarget.
217  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
218 
219  /// \returns Maximum number of waves per execution unit supported by the
220  /// subtarget and limited by given \p FlatWorkGroupSize.
221  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
222 
223  /// \returns Minimum number of waves per execution unit supported by the
224  /// subtarget.
225  virtual unsigned getMinWavesPerEU() const = 0;
226 
227  /// \returns Maximum number of waves per execution unit supported by the
228  /// subtarget without any kind of limitation.
229  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
230 
231  /// Creates value range metadata on an workitemid.* inrinsic call or load.
232  bool makeLIDRangeMetadata(Instruction *I) const;
233 
234  /// \returns Number of bytes of arguments that are passed to a shader or
235  /// kernel in addition to the explicit ones declared for the function.
236  unsigned getImplicitArgNumBytes(const Function &F) const {
237  if (isMesaKernel(F))
238  return 16;
239  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
240  }
241  uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
242  unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
243 
244  virtual ~AMDGPUSubtarget() {}
245 };
246 
248  public AMDGPUSubtarget {
249 
251 
252 public:
254  TrapHandlerAbiNone = 0,
255  TrapHandlerAbiHsa = 1
256  };
257 
258  enum TrapID {
259  TrapIDHardwareReserved = 0,
260  TrapIDHSADebugTrap = 1,
261  TrapIDLLVMTrap = 2,
262  TrapIDLLVMDebugTrap = 3,
263  TrapIDDebugBreakpoint = 7,
264  TrapIDDebugReserved8 = 8,
265  TrapIDDebugReservedFE = 0xfe,
266  TrapIDDebugReservedFF = 0xff
267  };
268 
270  LLVMTrapHandlerRegValue = 1
271  };
272 
273 private:
274  /// GlobalISel related APIs.
275  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
276  std::unique_ptr<InstructionSelector> InstSelector;
277  std::unique_ptr<LegalizerInfo> Legalizer;
278  std::unique_ptr<RegisterBankInfo> RegBankInfo;
279 
280 protected:
281  // Basic subtarget description.
283  unsigned Gen;
287 
288  // Possibly statically set by tablegen, but may want to be overridden.
291 
292  // Dynamially set bits that enable features.
304 
305  // Used as options.
311  bool DumpCode;
312 
313  // Subtarget statically properties set by tablegen
314  bool FP64;
315  bool FMA;
316  bool MIMG_R128;
317  bool IsGCN;
319  bool CIInsts;
320  bool GFX8Insts;
321  bool GFX9Insts;
328  bool HasMovrel;
337  bool HasDPP;
338  bool HasDPP8;
354  bool HasVscnt;
366  bool CaymanISA;
367  bool CFALUBug;
373 
383 
384  // Dummy feature to use for assembler in tablegen.
386 
388 private:
389  SIInstrInfo InstrInfo;
390  SITargetLowering TLInfo;
391  SIFrameLowering FrameLowering;
392 
393  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
394  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
395 
396 public:
397  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
398  const GCNTargetMachine &TM);
399  ~GCNSubtarget() override;
400 
401  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
402  StringRef GPU, StringRef FS);
403 
404  const SIInstrInfo *getInstrInfo() const override {
405  return &InstrInfo;
406  }
407 
408  const SIFrameLowering *getFrameLowering() const override {
409  return &FrameLowering;
410  }
411 
412  const SITargetLowering *getTargetLowering() const override {
413  return &TLInfo;
414  }
415 
416  const SIRegisterInfo *getRegisterInfo() const override {
417  return &InstrInfo.getRegisterInfo();
418  }
419 
420  const CallLowering *getCallLowering() const override {
421  return CallLoweringInfo.get();
422  }
423 
425  return InstSelector.get();
426  }
427 
428  const LegalizerInfo *getLegalizerInfo() const override {
429  return Legalizer.get();
430  }
431 
432  const RegisterBankInfo *getRegBankInfo() const override {
433  return RegBankInfo.get();
434  }
435 
436  // Nothing implemented, just prevent crashes on use.
437  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
438  return &TSInfo;
439  }
440 
441  const InstrItineraryData *getInstrItineraryData() const override {
442  return &InstrItins;
443  }
444 
445  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
446 
448  return (Generation)Gen;
449  }
450 
451  unsigned getWavefrontSizeLog2() const {
452  return Log2_32(WavefrontSize);
453  }
454 
455  /// Return the number of high bits known to be zero fror a frame index.
457  return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
458  }
459 
460  int getLDSBankCount() const {
461  return LDSBankCount;
462  }
463 
464  unsigned getMaxPrivateElementSize() const {
465  return MaxPrivateElementSize;
466  }
467 
468  unsigned getConstantBusLimit(unsigned Opcode) const;
469 
470  bool hasIntClamp() const {
471  return HasIntClamp;
472  }
473 
474  bool hasFP64() const {
475  return FP64;
476  }
477 
478  bool hasMIMG_R128() const {
479  return MIMG_R128;
480  }
481 
482  bool hasHWFP64() const {
483  return FP64;
484  }
485 
486  bool hasFastFMAF32() const {
487  return FastFMAF32;
488  }
489 
490  bool hasHalfRate64Ops() const {
491  return HalfRate64Ops;
492  }
493 
494  bool hasAddr64() const {
495  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
496  }
497 
498  // Return true if the target only has the reverse operand versions of VALU
499  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
500  bool hasOnlyRevVALUShifts() const {
501  return getGeneration() >= VOLCANIC_ISLANDS;
502  }
503 
504  bool hasBFE() const {
505  return true;
506  }
507 
508  bool hasBFI() const {
509  return true;
510  }
511 
512  bool hasBFM() const {
513  return hasBFE();
514  }
515 
516  bool hasBCNT(unsigned Size) const {
517  return true;
518  }
519 
520  bool hasFFBL() const {
521  return true;
522  }
523 
524  bool hasFFBH() const {
525  return true;
526  }
527 
528  bool hasMed3_16() const {
529  return getGeneration() >= AMDGPUSubtarget::GFX9;
530  }
531 
532  bool hasMin3Max3_16() const {
533  return getGeneration() >= AMDGPUSubtarget::GFX9;
534  }
535 
536  bool hasFmaMixInsts() const {
537  return HasFmaMixInsts;
538  }
539 
540  bool hasCARRY() const {
541  return true;
542  }
543 
544  bool hasFMA() const {
545  return FMA;
546  }
547 
548  bool hasSwap() const {
549  return GFX9Insts;
550  }
551 
552  bool hasScalarPackInsts() const {
553  return GFX9Insts;
554  }
555 
556  bool hasScalarMulHiInsts() const {
557  return GFX9Insts;
558  }
559 
561  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
562  }
563 
564  /// True if the offset field of DS instructions works as expected. On SI, the
565  /// offset uses a 16-bit adder and does not always wrap properly.
566  bool hasUsableDSOffset() const {
567  return getGeneration() >= SEA_ISLANDS;
568  }
569 
571  return EnableUnsafeDSOffsetFolding;
572  }
573 
574  /// Condition output from div_scale is usable.
576  return getGeneration() != SOUTHERN_ISLANDS;
577  }
578 
579  /// Extra wait hazard is needed in some cases before
580  /// s_cbranch_vccnz/s_cbranch_vccz.
581  bool hasReadVCCZBug() const {
582  return getGeneration() <= SEA_ISLANDS;
583  }
584 
585  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
586  /// was written by a VALU instruction.
588  return getGeneration() == SOUTHERN_ISLANDS;
589  }
590 
591  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
592  /// SGPR was written by a VALU Instruction.
594  return getGeneration() >= VOLCANIC_ISLANDS;
595  }
596 
597  bool hasRFEHazards() const {
598  return getGeneration() >= VOLCANIC_ISLANDS;
599  }
600 
601  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
602  unsigned getSetRegWaitStates() const {
603  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
604  }
605 
606  bool dumpCode() const {
607  return DumpCode;
608  }
609 
610  /// Return the amount of LDS that can be used that will not restrict the
611  /// occupancy lower than WaveCount.
612  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
613  const Function &) const;
614 
615  bool hasFP16Denormals() const {
616  return FP64FP16Denormals;
617  }
618 
619  bool hasFP64Denormals() const {
620  return FP64FP16Denormals;
621  }
622 
624  return getGeneration() >= AMDGPUSubtarget::GFX9;
625  }
626 
627  /// \returns If target supports S_DENORM_MODE.
628  bool hasDenormModeInst() const {
629  return getGeneration() >= AMDGPUSubtarget::GFX10;
630  }
631 
632  bool useFlatForGlobal() const {
633  return FlatForGlobal;
634  }
635 
636  /// \returns If target supports ds_read/write_b128 and user enables generation
637  /// of ds_read/write_b128.
638  bool useDS128() const {
639  return CIInsts && EnableDS128;
640  }
641 
642  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
643  bool haveRoundOpsF64() const {
644  return CIInsts;
645  }
646 
647  /// \returns If MUBUF instructions always perform range checking, even for
648  /// buffer resources used for private memory access.
650  return getGeneration() < AMDGPUSubtarget::GFX9;
651  }
652 
653  /// \returns If target requires PRT Struct NULL support (zero result registers
654  /// for sparse texture support).
655  bool usePRTStrictNull() const {
656  return EnablePRTStrictNull;
657  }
658 
660  return AutoWaitcntBeforeBarrier;
661  }
662 
663  bool hasCodeObjectV3() const {
664  // FIXME: Need to add code object v3 support for mesa and pal.
665  return isAmdHsaOS() ? CodeObjectV3 : false;
666  }
667 
669  return UnalignedBufferAccess;
670  }
671 
673  return UnalignedScratchAccess;
674  }
675 
676  bool hasApertureRegs() const {
677  return HasApertureRegs;
678  }
679 
680  bool isTrapHandlerEnabled() const {
681  return TrapHandler;
682  }
683 
684  bool isXNACKEnabled() const {
685  return EnableXNACK;
686  }
687 
688  bool isCuModeEnabled() const {
689  return EnableCuMode;
690  }
691 
692  bool hasFlatAddressSpace() const {
693  return FlatAddressSpace;
694  }
695 
696  bool hasFlatScrRegister() const {
697  return hasFlatAddressSpace();
698  }
699 
700  bool hasFlatInstOffsets() const {
701  return FlatInstOffsets;
702  }
703 
704  bool hasFlatGlobalInsts() const {
705  return FlatGlobalInsts;
706  }
707 
708  bool hasFlatScratchInsts() const {
709  return FlatScratchInsts;
710  }
711 
713  return ScalarFlatScratchInsts;
714  }
715 
716  bool hasFlatSegmentOffsetBug() const {
717  return HasFlatSegmentOffsetBug;
718  }
719 
721  return getGeneration() > GFX9;
722  }
723 
724  bool hasD16LoadStore() const {
725  return getGeneration() >= GFX9;
726  }
727 
728  bool d16PreservesUnusedBits() const {
729  return hasD16LoadStore() && !isSRAMECCEnabled();
730  }
731 
732  bool hasD16Images() const {
733  return getGeneration() >= VOLCANIC_ISLANDS;
734  }
735 
736  /// Return if most LDS instructions have an m0 use that require m0 to be
737  /// iniitalized.
738  bool ldsRequiresM0Init() const {
739  return getGeneration() < GFX9;
740  }
741 
742  // True if the hardware rewinds and replays GWS operations if a wave is
743  // preempted.
744  //
745  // If this is false, a GWS operation requires testing if a nack set the
746  // MEM_VIOL bit, and repeating if so.
747  bool hasGWSAutoReplay() const {
748  return getGeneration() >= GFX9;
749  }
750 
751  /// \returns if target has ds_gws_sema_release_all instruction.
752  bool hasGWSSemaReleaseAll() const {
753  return CIInsts;
754  }
755 
756  bool hasAddNoCarry() const {
757  return AddNoCarryInsts;
758  }
759 
760  bool hasUnpackedD16VMem() const {
761  return HasUnpackedD16VMem;
762  }
763 
764  // Covers VS/PS/CS graphics shaders
765  bool isMesaGfxShader(const Function &F) const {
766  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
767  }
768 
769  bool hasMad64_32() const {
770  return getGeneration() >= SEA_ISLANDS;
771  }
772 
773  bool hasSDWAOmod() const {
774  return HasSDWAOmod;
775  }
776 
777  bool hasSDWAScalar() const {
778  return HasSDWAScalar;
779  }
780 
781  bool hasSDWASdst() const {
782  return HasSDWASdst;
783  }
784 
785  bool hasSDWAMac() const {
786  return HasSDWAMac;
787  }
788 
789  bool hasSDWAOutModsVOPC() const {
790  return HasSDWAOutModsVOPC;
791  }
792 
793  bool hasDLInsts() const {
794  return HasDLInsts;
795  }
796 
797  bool hasDot1Insts() const {
798  return HasDot1Insts;
799  }
800 
801  bool hasDot2Insts() const {
802  return HasDot2Insts;
803  }
804 
805  bool hasDot3Insts() const {
806  return HasDot3Insts;
807  }
808 
809  bool hasDot4Insts() const {
810  return HasDot4Insts;
811  }
812 
813  bool hasDot5Insts() const {
814  return HasDot5Insts;
815  }
816 
817  bool hasDot6Insts() const {
818  return HasDot6Insts;
819  }
820 
821  bool hasMAIInsts() const {
822  return HasMAIInsts;
823  }
824 
825  bool hasPkFmacF16Inst() const {
826  return HasPkFmacF16Inst;
827  }
828 
829  bool hasAtomicFaddInsts() const {
830  return HasAtomicFaddInsts;
831  }
832 
833  bool isSRAMECCEnabled() const {
834  return EnableSRAMECC;
835  }
836 
837  bool hasNoSdstCMPX() const {
838  return HasNoSdstCMPX;
839  }
840 
841  bool hasVscnt() const {
842  return HasVscnt;
843  }
844 
845  bool hasRegisterBanking() const {
846  return HasRegisterBanking;
847  }
848 
849  bool hasVOP3Literal() const {
850  return HasVOP3Literal;
851  }
852 
853  bool hasNoDataDepHazard() const {
854  return HasNoDataDepHazard;
855  }
856 
858  return getGeneration() < SEA_ISLANDS;
859  }
860 
861  // Scratch is allocated in 256 dword per wave blocks for the entire
862  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
863  // is 4-byte aligned.
864  //
865  // Only 4-byte alignment is really needed to access anything. Transformations
866  // on the pointer value itself may rely on the alignment / known low bits of
867  // the pointer. Set this to something above the minimum to avoid needing
868  // dynamic realignment in common cases.
869  Align getStackAlignment() const { return Align(16); }
870 
871  bool enableMachineScheduler() const override {
872  return true;
873  }
874 
875  bool enableSubRegLiveness() const override {
876  return true;
877  }
878 
879  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
881 
882  /// \returns Number of execution units per compute unit supported by the
883  /// subtarget.
884  unsigned getEUsPerCU() const {
885  return AMDGPU::IsaInfo::getEUsPerCU(this);
886  }
887 
888  /// \returns Maximum number of waves per compute unit supported by the
889  /// subtarget without any kind of limitation.
890  unsigned getMaxWavesPerCU() const {
892  }
893 
894  /// \returns Maximum number of waves per compute unit supported by the
895  /// subtarget and limited by given \p FlatWorkGroupSize.
896  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
897  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
898  }
899 
900  /// \returns Number of waves per work group supported by the subtarget and
901  /// limited by given \p FlatWorkGroupSize.
902  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
903  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
904  }
905 
906  // static wrappers
907  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
908 
909  // XXX - Why is this here if it isn't in the default pass set?
910  bool enableEarlyIfConversion() const override {
911  return true;
912  }
913 
914  void overrideSchedPolicy(MachineSchedPolicy &Policy,
915  unsigned NumRegionInstrs) const override;
916 
917  unsigned getMaxNumUserSGPRs() const {
918  return 16;
919  }
920 
921  bool hasSMemRealTime() const {
922  return HasSMemRealTime;
923  }
924 
925  bool hasMovrel() const {
926  return HasMovrel;
927  }
928 
929  bool hasVGPRIndexMode() const {
930  return HasVGPRIndexMode;
931  }
932 
933  bool useVGPRIndexMode(bool UserEnable) const {
934  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
935  }
936 
937  bool hasScalarCompareEq64() const {
938  return getGeneration() >= VOLCANIC_ISLANDS;
939  }
940 
941  bool hasScalarStores() const {
942  return HasScalarStores;
943  }
944 
945  bool hasScalarAtomics() const {
946  return HasScalarAtomics;
947  }
948 
949  bool hasLDSFPAtomics() const {
950  return GFX8Insts;
951  }
952 
953  bool hasDPP() const {
954  return HasDPP;
955  }
956 
957  bool hasDPPBroadcasts() const {
958  return HasDPP && getGeneration() < GFX10;
959  }
960 
961  bool hasDPPWavefrontShifts() const {
962  return HasDPP && getGeneration() < GFX10;
963  }
964 
965  bool hasDPP8() const {
966  return HasDPP8;
967  }
968 
969  bool hasR128A16() const {
970  return HasR128A16;
971  }
972 
973  bool hasOffset3fBug() const {
974  return HasOffset3fBug;
975  }
976 
977  bool hasNSAEncoding() const {
978  return HasNSAEncoding;
979  }
980 
981  bool hasMadF16() const;
982 
983  bool enableSIScheduler() const {
984  return EnableSIScheduler;
985  }
986 
987  bool loadStoreOptEnabled() const {
988  return EnableLoadStoreOpt;
989  }
990 
991  bool hasSGPRInitBug() const {
992  return SGPRInitBug;
993  }
994 
995  bool hasMFMAInlineLiteralBug() const {
996  return HasMFMAInlineLiteralBug;
997  }
998 
999  bool has12DWordStoreHazard() const {
1000  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
1001  }
1002 
1003  // \returns true if the subtarget supports DWORDX3 load/store instructions.
1004  bool hasDwordx3LoadStores() const {
1005  return CIInsts;
1006  }
1007 
1008  bool hasSMovFedHazard() const {
1009  return getGeneration() == AMDGPUSubtarget::GFX9;
1010  }
1011 
1013  return getGeneration() == AMDGPUSubtarget::GFX9;
1014  }
1015 
1016  bool hasReadM0SendMsgHazard() const {
1017  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1018  getGeneration() <= AMDGPUSubtarget::GFX9;
1019  }
1020 
1021  bool hasVcmpxPermlaneHazard() const {
1022  return HasVcmpxPermlaneHazard;
1023  }
1024 
1026  return HasVMEMtoScalarWriteHazard;
1027  }
1028 
1030  return HasSMEMtoVectorWriteHazard;
1031  }
1032 
1033  bool hasLDSMisalignedBug() const {
1034  return LDSMisalignedBug && !EnableCuMode;
1035  }
1036 
1037  bool hasInstFwdPrefetchBug() const {
1038  return HasInstFwdPrefetchBug;
1039  }
1040 
1041  bool hasVcmpxExecWARHazard() const {
1042  return HasVcmpxExecWARHazard;
1043  }
1044 
1046  return HasLdsBranchVmemWARHazard;
1047  }
1048 
1049  bool hasNSAtoVMEMBug() const {
1050  return HasNSAtoVMEMBug;
1051  }
1052 
1053  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1054  /// SGPRs
1055  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1056 
1057  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1058  /// VGPRs
1059  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1060 
1061  /// Return occupancy for the given function. Used LDS and a number of
1062  /// registers if provided.
1063  /// Note, occupancy can be affected by the scratch allocation as well, but
1064  /// we do not have enough information to compute it.
1065  unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
1066  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1067 
1068  /// \returns true if the flat_scratch register should be initialized with the
1069  /// pointer to the wave's scratch memory rather than a size and offset.
1070  bool flatScratchIsPointer() const {
1071  return getGeneration() >= AMDGPUSubtarget::GFX9;
1072  }
1073 
1074  /// \returns true if the machine has merged shaders in which s0-s7 are
1075  /// reserved by the hardware and user SGPRs start at s8
1076  bool hasMergedShaders() const {
1077  return getGeneration() >= GFX9;
1078  }
1079 
1080  /// \returns SGPR allocation granularity supported by the subtarget.
1081  unsigned getSGPRAllocGranule() const {
1083  }
1084 
1085  /// \returns SGPR encoding granularity supported by the subtarget.
1086  unsigned getSGPREncodingGranule() const {
1088  }
1089 
1090  /// \returns Total number of SGPRs supported by the subtarget.
1091  unsigned getTotalNumSGPRs() const {
1092  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1093  }
1094 
1095  /// \returns Addressable number of SGPRs supported by the subtarget.
1096  unsigned getAddressableNumSGPRs() const {
1098  }
1099 
1100  /// \returns Minimum number of SGPRs that meets the given number of waves per
1101  /// execution unit requirement supported by the subtarget.
1102  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1103  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1104  }
1105 
1106  /// \returns Maximum number of SGPRs that meets the given number of waves per
1107  /// execution unit requirement supported by the subtarget.
1108  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1109  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1110  }
1111 
1112  /// \returns Reserved number of SGPRs for given function \p MF.
1113  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1114 
1115  /// \returns Maximum number of SGPRs that meets number of waves per execution
1116  /// unit requirement for function \p MF, or number of SGPRs explicitly
1117  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1118  ///
1119  /// \returns Value that meets number of waves per execution unit requirement
1120  /// if explicitly requested value cannot be converted to integer, violates
1121  /// subtarget's specifications, or does not meet number of waves per execution
1122  /// unit requirement.
1123  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1124 
1125  /// \returns VGPR allocation granularity supported by the subtarget.
1126  unsigned getVGPRAllocGranule() const {
1128  }
1129 
1130  /// \returns VGPR encoding granularity supported by the subtarget.
1131  unsigned getVGPREncodingGranule() const {
1133  }
1134 
1135  /// \returns Total number of VGPRs supported by the subtarget.
1136  unsigned getTotalNumVGPRs() const {
1137  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1138  }
1139 
1140  /// \returns Addressable number of VGPRs supported by the subtarget.
1141  unsigned getAddressableNumVGPRs() const {
1143  }
1144 
1145  /// \returns Minimum number of VGPRs that meets given number of waves per
1146  /// execution unit requirement supported by the subtarget.
1147  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1148  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1149  }
1150 
1151  /// \returns Maximum number of VGPRs that meets given number of waves per
1152  /// execution unit requirement supported by the subtarget.
1153  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1154  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1155  }
1156 
1157  /// \returns Maximum number of VGPRs that meets number of waves per execution
1158  /// unit requirement for function \p MF, or number of VGPRs explicitly
1159  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1160  ///
1161  /// \returns Value that meets number of waves per execution unit requirement
1162  /// if explicitly requested value cannot be converted to integer, violates
1163  /// subtarget's specifications, or does not meet number of waves per execution
1164  /// unit requirement.
1165  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1166 
1167  void getPostRAMutations(
1168  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1169  const override;
1170 
1171  bool isWave32() const {
1172  return WavefrontSize == 32;
1173  }
1174 
1176  return getRegisterInfo()->getBoolRC();
1177  }
1178 
1179  /// \returns Maximum number of work groups per compute unit supported by the
1180  /// subtarget and limited by given \p FlatWorkGroupSize.
1181  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1182  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1183  }
1184 
1185  /// \returns Minimum flat work group size supported by the subtarget.
1186  unsigned getMinFlatWorkGroupSize() const override {
1188  }
1189 
1190  /// \returns Maximum flat work group size supported by the subtarget.
1191  unsigned getMaxFlatWorkGroupSize() const override {
1193  }
1194 
1195  /// \returns Maximum number of waves per execution unit supported by the
1196  /// subtarget and limited by given \p FlatWorkGroupSize.
1197  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1198  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1199  }
1200 
1201  /// \returns Minimum number of waves per execution unit supported by the
1202  /// subtarget.
1203  unsigned getMinWavesPerEU() const override {
1204  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1205  }
1206 };
1207 
1209  public AMDGPUSubtarget {
1210 private:
1211  R600InstrInfo InstrInfo;
1212  R600FrameLowering FrameLowering;
1213  bool FMA;
1214  bool CaymanISA;
1215  bool CFALUBug;
1216  bool HasVertexCache;
1217  bool R600ALUInst;
1218  bool FP64;
1219  short TexVTXClauseSize;
1220  Generation Gen;
1221  R600TargetLowering TLInfo;
1222  InstrItineraryData InstrItins;
1223  SelectionDAGTargetInfo TSInfo;
1224 
1225 public:
1226  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1227  const TargetMachine &TM);
1228 
1229  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1230 
1231  const R600FrameLowering *getFrameLowering() const override {
1232  return &FrameLowering;
1233  }
1234 
1235  const R600TargetLowering *getTargetLowering() const override {
1236  return &TLInfo;
1237  }
1238 
1239  const R600RegisterInfo *getRegisterInfo() const override {
1240  return &InstrInfo.getRegisterInfo();
1241  }
1242 
1243  const InstrItineraryData *getInstrItineraryData() const override {
1244  return &InstrItins;
1245  }
1246 
1247  // Nothing implemented, just prevent crashes on use.
1249  return &TSInfo;
1250  }
1251 
1252  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1253 
1255  return Gen;
1256  }
1257 
1258  Align getStackAlignment() const { return Align(4); }
1259 
1260  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1261  StringRef GPU, StringRef FS);
1262 
1263  bool hasBFE() const {
1264  return (getGeneration() >= EVERGREEN);
1265  }
1266 
1267  bool hasBFI() const {
1268  return (getGeneration() >= EVERGREEN);
1269  }
1270 
1271  bool hasBCNT(unsigned Size) const {
1272  if (Size == 32)
1273  return (getGeneration() >= EVERGREEN);
1274 
1275  return false;
1276  }
1277 
1278  bool hasBORROW() const {
1279  return (getGeneration() >= EVERGREEN);
1280  }
1281 
1282  bool hasCARRY() const {
1283  return (getGeneration() >= EVERGREEN);
1284  }
1285 
1286  bool hasCaymanISA() const {
1287  return CaymanISA;
1288  }
1289 
1290  bool hasFFBL() const {
1291  return (getGeneration() >= EVERGREEN);
1292  }
1293 
1294  bool hasFFBH() const {
1295  return (getGeneration() >= EVERGREEN);
1296  }
1297 
1298  bool hasFMA() const { return FMA; }
1299 
1300  bool hasCFAluBug() const { return CFALUBug; }
1301 
1302  bool hasVertexCache() const { return HasVertexCache; }
1303 
1304  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1305 
1306  bool enableMachineScheduler() const override {
1307  return true;
1308  }
1309 
1310  bool enableSubRegLiveness() const override {
1311  return true;
1312  }
1313 
1314  /// \returns Maximum number of work groups per compute unit supported by the
1315  /// subtarget and limited by given \p FlatWorkGroupSize.
1316  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1317  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1318  }
1319 
1320  /// \returns Minimum flat work group size supported by the subtarget.
1321  unsigned getMinFlatWorkGroupSize() const override {
1323  }
1324 
1325  /// \returns Maximum flat work group size supported by the subtarget.
1326  unsigned getMaxFlatWorkGroupSize() const override {
1328  }
1329 
1330  /// \returns Maximum number of waves per execution unit supported by the
1331  /// subtarget and limited by given \p FlatWorkGroupSize.
1332  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1333  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1334  }
1335 
1336  /// \returns Minimum number of waves per execution unit supported by the
1337  /// subtarget.
1338  unsigned getMinWavesPerEU() const override {
1339  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1340  }
1341 };
1342 
1343 } // end namespace llvm
1344 
1345 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasVscnt() const
bool hasSDWAOmod() const
bool hasLDSMisalignedBug() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasVOP3Literal() const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool hasPkFmacF16Inst() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool hasScalarPackInsts() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasFlatScrRegister() const
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
bool hasNSAtoVMEMBug() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasAtomicFaddInsts() const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasVcmpxPermlaneHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
InstrItineraryData InstrItins
Align getStackAlignment() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasInstFwdPrefetchBug() const
bool hasFastFMAF32() const
Generation getGeneration() const
bool hasFlatSegmentOffsetBug() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasDPPWavefrontShifts() const
bool hasFP64Denormals() const
bool hasScalarMulHiInsts() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasLdsBranchVmemWARHazard() const
bool hasD16Images() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
bool hasDot4Insts() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
const TargetRegisterClass * getBoolRC() const
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
bool useFlatForGlobal() const
Align getAlignmentForImplicitArgPtr() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
Align getStackAlignment() const
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasVMEMtoScalarWriteHazard() const
bool hasAddNoCarry() const
bool hasDot3Insts() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasNoDataDepHazard() const
bool dumpCode() const
bool hasDot6Insts() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
bool hasCFAluBug() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasNSAEncoding() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool isCuModeEnabled() const
bool hasLDSFPAtomics() const
bool hasSMEMtoVectorWriteHazard() const
const R600InstrInfo * getInstrInfo() const override
bool hasRegisterBanking() const
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasGWSAutoReplay() const
bool hasOnlyRevVALUShifts() const
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:225
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
bool hasScalarAtomics() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasOffset3fBug() const
bool hasVcmpxExecWARHazard() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
bool hasGWSSemaReleaseAll() const
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDenormModeInst() const
bool enableSIScheduler() const
bool hasRFEHazards() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasMFMAInlineLiteralBug() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
bool hasScalarFlatScratchInsts() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
int getLDSBankCount() const
bool hasDPPBroadcasts() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool hasDot5Insts() const
bool enableSubRegLiveness() const override
bool hasNoSdstCMPX() const
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:334
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasMAIInsts() const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool hasDPP8() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
InstructionSelector * getInstructionSelector() const override
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override