LLVM  14.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 #define GET_SUBTARGETINFO_HEADER
25 #include "AMDGPUGenSubtargetInfo.inc"
26 
27 namespace llvm {
28 
29 class GCNTargetMachine;
30 
31 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
32  public AMDGPUSubtarget {
33 
35 
36 public:
37  // Following 2 enums are documented at:
38  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
39  enum class TrapHandlerAbi {
40  NONE = 0x00,
41  AMDHSA = 0x01,
42  };
43 
44  enum class TrapID {
45  LLVMAMDHSATrap = 0x02,
46  LLVMAMDHSADebugTrap = 0x03,
47  };
48 
49 private:
50  /// GlobalISel related APIs.
51  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
52  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
53  std::unique_ptr<InstructionSelector> InstSelector;
54  std::unique_ptr<LegalizerInfo> Legalizer;
55  std::unique_ptr<RegisterBankInfo> RegBankInfo;
56 
57 protected:
58  // Basic subtarget description.
61  unsigned Gen;
65 
66  // Possibly statically set by tablegen, but may want to be overridden.
67  bool FastFMAF32;
71 
72  // Dynamically set bits that enable features.
79 
80  // This should not be used directly. 'TargetID' tracks the dynamic settings
81  // for XNACK.
83 
87 
88  // Used as options.
94  bool DumpCode;
95 
96  // Subtarget statically properties set by tablegen
97  bool FP64;
98  bool FMA;
99  bool MIMG_R128;
100  bool CIInsts;
101  bool GFX8Insts;
102  bool GFX9Insts;
113  bool HasMovrel;
122  bool HasDPP;
123  bool HasDPP8;
129  bool HasG16;
131  unsigned NSAMaxSize;
146 
147  // This should not be used directly. 'TargetID' tracks the dynamic settings
148  // for SRAMECC.
150 
152  bool HasVscnt;
173 
186 
187  // Dummy feature to use for assembler in tablegen.
189 
191 private:
192  SIInstrInfo InstrInfo;
193  SITargetLowering TLInfo;
194  SIFrameLowering FrameLowering;
195 
196 public:
197  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
198  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
199 
200  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
201  const GCNTargetMachine &TM);
202  ~GCNSubtarget() override;
203 
205  StringRef GPU, StringRef FS);
206 
207  const SIInstrInfo *getInstrInfo() const override {
208  return &InstrInfo;
209  }
210 
211  const SIFrameLowering *getFrameLowering() const override {
212  return &FrameLowering;
213  }
214 
215  const SITargetLowering *getTargetLowering() const override {
216  return &TLInfo;
217  }
218 
219  const SIRegisterInfo *getRegisterInfo() const override {
220  return &InstrInfo.getRegisterInfo();
221  }
222 
223  const CallLowering *getCallLowering() const override {
224  return CallLoweringInfo.get();
225  }
226 
227  const InlineAsmLowering *getInlineAsmLowering() const override {
228  return InlineAsmLoweringInfo.get();
229  }
230 
232  return InstSelector.get();
233  }
234 
235  const LegalizerInfo *getLegalizerInfo() const override {
236  return Legalizer.get();
237  }
238 
239  const RegisterBankInfo *getRegBankInfo() const override {
240  return RegBankInfo.get();
241  }
242 
244  return TargetID;
245  }
246 
247  // Nothing implemented, just prevent crashes on use.
248  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
249  return &TSInfo;
250  }
251 
252  const InstrItineraryData *getInstrItineraryData() const override {
253  return &InstrItins;
254  }
255 
257 
259  return (Generation)Gen;
260  }
261 
262  /// Return the number of high bits known to be zero for a frame index.
265  }
266 
267  int getLDSBankCount() const {
268  return LDSBankCount;
269  }
270 
271  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
272  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
273  }
274 
275  unsigned getConstantBusLimit(unsigned Opcode) const;
276 
277  /// Returns if the result of this instruction with a 16-bit result returned in
278  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
279  /// the original value.
280  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
281 
282  bool hasIntClamp() const {
283  return HasIntClamp;
284  }
285 
286  bool hasFP64() const {
287  return FP64;
288  }
289 
290  bool hasMIMG_R128() const {
291  return MIMG_R128;
292  }
293 
294  bool hasHWFP64() const {
295  return FP64;
296  }
297 
298  bool hasFastFMAF32() const {
299  return FastFMAF32;
300  }
301 
302  bool hasHalfRate64Ops() const {
303  return HalfRate64Ops;
304  }
305 
306  bool hasFullRate64Ops() const {
307  return FullRate64Ops;
308  }
309 
310  bool hasAddr64() const {
312  }
313 
314  bool hasFlat() const {
316  }
317 
318  // Return true if the target only has the reverse operand versions of VALU
319  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
320  bool hasOnlyRevVALUShifts() const {
321  return getGeneration() >= VOLCANIC_ISLANDS;
322  }
323 
324  bool hasFractBug() const {
325  return getGeneration() == SOUTHERN_ISLANDS;
326  }
327 
328  bool hasBFE() const {
329  return true;
330  }
331 
332  bool hasBFI() const {
333  return true;
334  }
335 
336  bool hasBFM() const {
337  return hasBFE();
338  }
339 
340  bool hasBCNT(unsigned Size) const {
341  return true;
342  }
343 
344  bool hasFFBL() const {
345  return true;
346  }
347 
348  bool hasFFBH() const {
349  return true;
350  }
351 
352  bool hasMed3_16() const {
354  }
355 
356  bool hasMin3Max3_16() const {
358  }
359 
360  bool hasFmaMixInsts() const {
361  return HasFmaMixInsts;
362  }
363 
364  bool hasCARRY() const {
365  return true;
366  }
367 
368  bool hasFMA() const {
369  return FMA;
370  }
371 
372  bool hasSwap() const {
373  return GFX9Insts;
374  }
375 
376  bool hasScalarPackInsts() const {
377  return GFX9Insts;
378  }
379 
380  bool hasScalarMulHiInsts() const {
381  return GFX9Insts;
382  }
383 
386  }
387 
388  bool supportsGetDoorbellID() const {
389  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
390  return getGeneration() >= GFX9;
391  }
392 
393  /// True if the offset field of DS instructions works as expected. On SI, the
394  /// offset uses a 16-bit adder and does not always wrap properly.
395  bool hasUsableDSOffset() const {
396  return getGeneration() >= SEA_ISLANDS;
397  }
398 
401  }
402 
403  /// Condition output from div_scale is usable.
405  return getGeneration() != SOUTHERN_ISLANDS;
406  }
407 
408  /// Extra wait hazard is needed in some cases before
409  /// s_cbranch_vccnz/s_cbranch_vccz.
410  bool hasReadVCCZBug() const {
411  return getGeneration() <= SEA_ISLANDS;
412  }
413 
414  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
416  return getGeneration() >= GFX10;
417  }
418 
419  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
420  /// was written by a VALU instruction.
422  return getGeneration() == SOUTHERN_ISLANDS;
423  }
424 
425  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
426  /// SGPR was written by a VALU Instruction.
428  return getGeneration() >= VOLCANIC_ISLANDS;
429  }
430 
431  bool hasRFEHazards() const {
432  return getGeneration() >= VOLCANIC_ISLANDS;
433  }
434 
435  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
436  unsigned getSetRegWaitStates() const {
437  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
438  }
439 
440  bool dumpCode() const {
441  return DumpCode;
442  }
443 
444  /// Return the amount of LDS that can be used that will not restrict the
445  /// occupancy lower than WaveCount.
446  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
447  const Function &) const;
448 
451  }
452 
453  /// \returns If target supports S_DENORM_MODE.
454  bool hasDenormModeInst() const {
456  }
457 
458  bool useFlatForGlobal() const {
459  return FlatForGlobal;
460  }
461 
462  /// \returns If target supports ds_read/write_b128 and user enables generation
463  /// of ds_read/write_b128.
464  bool useDS128() const {
465  return CIInsts && EnableDS128;
466  }
467 
468  /// \return If target supports ds_read/write_b96/128.
469  bool hasDS96AndDS128() const {
470  return CIInsts;
471  }
472 
473  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
474  bool haveRoundOpsF64() const {
475  return CIInsts;
476  }
477 
478  /// \returns If MUBUF instructions always perform range checking, even for
479  /// buffer resources used for private memory access.
482  }
483 
484  /// \returns If target requires PRT Struct NULL support (zero result registers
485  /// for sparse texture support).
486  bool usePRTStrictNull() const {
487  return EnablePRTStrictNull;
488  }
489 
492  }
493 
495  return UnalignedBufferAccess;
496  }
497 
500  }
501 
502  bool hasUnalignedDSAccess() const {
503  return UnalignedDSAccess;
504  }
505 
508  }
509 
511  return UnalignedScratchAccess;
512  }
513 
514  bool hasUnalignedAccessMode() const {
515  return UnalignedAccessMode;
516  }
517 
518  bool hasApertureRegs() const {
519  return HasApertureRegs;
520  }
521 
522  bool isTrapHandlerEnabled() const {
523  return TrapHandler;
524  }
525 
526  bool isXNACKEnabled() const {
527  return TargetID.isXnackOnOrAny();
528  }
529 
530  bool isTgSplitEnabled() const {
531  return EnableTgSplit;
532  }
533 
534  bool isCuModeEnabled() const {
535  return EnableCuMode;
536  }
537 
538  bool hasFlatAddressSpace() const {
539  return FlatAddressSpace;
540  }
541 
542  bool hasFlatScrRegister() const {
543  return hasFlatAddressSpace();
544  }
545 
546  bool hasFlatInstOffsets() const {
547  return FlatInstOffsets;
548  }
549 
550  bool hasFlatGlobalInsts() const {
551  return FlatGlobalInsts;
552  }
553 
554  bool hasFlatScratchInsts() const {
555  return FlatScratchInsts;
556  }
557 
558  // Check if target supports ST addressing mode with FLAT scratch instructions.
559  // The ST addressing mode means no registers are used, either VGPR or SGPR,
560  // but only immediate offset is swizzled and added to the FLAT scratch base.
561  bool hasFlatScratchSTMode() const {
562  return hasFlatScratchInsts() && hasGFX10_3Insts();
563  }
564 
566  return ScalarFlatScratchInsts;
567  }
568 
569  bool hasGlobalAddTidInsts() const {
570  return GFX10_BEncoding;
571  }
572 
573  bool hasAtomicCSub() const {
574  return GFX10_BEncoding;
575  }
576 
578  return getGeneration() >= GFX9;
579  }
580 
581  bool hasFlatSegmentOffsetBug() const {
583  }
584 
586  return getGeneration() > GFX9;
587  }
588 
589  bool hasD16LoadStore() const {
590  return getGeneration() >= GFX9;
591  }
592 
593  bool d16PreservesUnusedBits() const {
595  }
596 
597  bool hasD16Images() const {
598  return getGeneration() >= VOLCANIC_ISLANDS;
599  }
600 
601  /// Return if most LDS instructions have an m0 use that require m0 to be
602  /// initialized.
603  bool ldsRequiresM0Init() const {
604  return getGeneration() < GFX9;
605  }
606 
607  // True if the hardware rewinds and replays GWS operations if a wave is
608  // preempted.
609  //
610  // If this is false, a GWS operation requires testing if a nack set the
611  // MEM_VIOL bit, and repeating if so.
612  bool hasGWSAutoReplay() const {
613  return getGeneration() >= GFX9;
614  }
615 
616  /// \returns if target has ds_gws_sema_release_all instruction.
617  bool hasGWSSemaReleaseAll() const {
618  return CIInsts;
619  }
620 
621  /// \returns true if the target has integer add/sub instructions that do not
622  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
623  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
624  /// for saturation.
625  bool hasAddNoCarry() const {
626  return AddNoCarryInsts;
627  }
628 
629  bool hasUnpackedD16VMem() const {
630  return HasUnpackedD16VMem;
631  }
632 
633  // Covers VS/PS/CS graphics shaders
634  bool isMesaGfxShader(const Function &F) const {
635  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
636  }
637 
638  bool hasMad64_32() const {
639  return getGeneration() >= SEA_ISLANDS;
640  }
641 
642  bool hasSDWAOmod() const {
643  return HasSDWAOmod;
644  }
645 
646  bool hasSDWAScalar() const {
647  return HasSDWAScalar;
648  }
649 
650  bool hasSDWASdst() const {
651  return HasSDWASdst;
652  }
653 
654  bool hasSDWAMac() const {
655  return HasSDWAMac;
656  }
657 
658  bool hasSDWAOutModsVOPC() const {
659  return HasSDWAOutModsVOPC;
660  }
661 
662  bool hasDLInsts() const {
663  return HasDLInsts;
664  }
665 
666  bool hasDot1Insts() const {
667  return HasDot1Insts;
668  }
669 
670  bool hasDot2Insts() const {
671  return HasDot2Insts;
672  }
673 
674  bool hasDot3Insts() const {
675  return HasDot3Insts;
676  }
677 
678  bool hasDot4Insts() const {
679  return HasDot4Insts;
680  }
681 
682  bool hasDot5Insts() const {
683  return HasDot5Insts;
684  }
685 
686  bool hasDot6Insts() const {
687  return HasDot6Insts;
688  }
689 
690  bool hasDot7Insts() const {
691  return HasDot7Insts;
692  }
693 
694  bool hasMAIInsts() const {
695  return HasMAIInsts;
696  }
697 
698  bool hasPkFmacF16Inst() const {
699  return HasPkFmacF16Inst;
700  }
701 
702  bool hasAtomicFaddInsts() const {
703  return HasAtomicFaddInsts;
704  }
705 
706  bool hasNoSdstCMPX() const {
707  return HasNoSdstCMPX;
708  }
709 
710  bool hasVscnt() const {
711  return HasVscnt;
712  }
713 
714  bool hasGetWaveIdInst() const {
715  return HasGetWaveIdInst;
716  }
717 
718  bool hasSMemTimeInst() const {
719  return HasSMemTimeInst;
720  }
721 
722  bool hasShaderCyclesRegister() const {
724  }
725 
726  bool hasRegisterBanking() const {
727  return HasRegisterBanking;
728  }
729 
730  bool hasVOP3Literal() const {
731  return HasVOP3Literal;
732  }
733 
734  bool hasNoDataDepHazard() const {
735  return HasNoDataDepHazard;
736  }
737 
739  return getGeneration() < SEA_ISLANDS;
740  }
741 
742  // Scratch is allocated in 256 dword per wave blocks for the entire
743  // wavefront. When viewed from the perspective of an arbitrary workitem, this
744  // is 4-byte aligned.
745  //
746  // Only 4-byte alignment is really needed to access anything. Transformations
747  // on the pointer value itself may rely on the alignment / known low bits of
748  // the pointer. Set this to something above the minimum to avoid needing
749  // dynamic realignment in common cases.
750  Align getStackAlignment() const { return Align(16); }
751 
752  bool enableMachineScheduler() const override {
753  return true;
754  }
755 
756  bool useAA() const override;
757 
758  bool enableSubRegLiveness() const override {
759  return true;
760  }
761 
764 
765  // static wrappers
766  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
767 
768  // XXX - Why is this here if it isn't in the default pass set?
769  bool enableEarlyIfConversion() const override {
770  return true;
771  }
772 
773  bool enableFlatScratch() const;
774 
776  unsigned NumRegionInstrs) const override;
777 
778  unsigned getMaxNumUserSGPRs() const {
779  return 16;
780  }
781 
782  bool hasSMemRealTime() const {
783  return HasSMemRealTime;
784  }
785 
786  bool hasMovrel() const {
787  return HasMovrel;
788  }
789 
790  bool hasVGPRIndexMode() const {
791  return HasVGPRIndexMode;
792  }
793 
794  bool useVGPRIndexMode() const;
795 
796  bool hasScalarCompareEq64() const {
797  return getGeneration() >= VOLCANIC_ISLANDS;
798  }
799 
800  bool hasScalarStores() const {
801  return HasScalarStores;
802  }
803 
804  bool hasScalarAtomics() const {
805  return HasScalarAtomics;
806  }
807 
808  bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
809 
810  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
811  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
812 
813  bool hasDPP() const {
814  return HasDPP;
815  }
816 
817  bool hasDPPBroadcasts() const {
818  return HasDPP && getGeneration() < GFX10;
819  }
820 
821  bool hasDPPWavefrontShifts() const {
822  return HasDPP && getGeneration() < GFX10;
823  }
824 
825  bool hasDPP8() const {
826  return HasDPP8;
827  }
828 
829  bool has64BitDPP() const {
830  return Has64BitDPP;
831  }
832 
833  bool hasPackedFP32Ops() const {
834  return HasPackedFP32Ops;
835  }
836 
837  bool hasFmaakFmamkF32Insts() const {
838  return getGeneration() >= GFX10;
839  }
840 
841  bool hasExtendedImageInsts() const {
842  return HasExtendedImageInsts;
843  }
844 
845  bool hasR128A16() const {
846  return HasR128A16;
847  }
848 
849  bool hasGFX10A16() const {
850  return HasGFX10A16;
851  }
852 
853  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
854 
855  bool hasG16() const { return HasG16; }
856 
857  bool hasOffset3fBug() const {
858  return HasOffset3fBug;
859  }
860 
861  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
862 
864 
865  bool hasNSAEncoding() const { return HasNSAEncoding; }
866 
867  unsigned getNSAMaxSize() const { return NSAMaxSize; }
868 
869  bool hasGFX10_AEncoding() const {
870  return GFX10_AEncoding;
871  }
872 
873  bool hasGFX10_BEncoding() const {
874  return GFX10_BEncoding;
875  }
876 
877  bool hasGFX10_3Insts() const {
878  return GFX10_3Insts;
879  }
880 
881  bool hasMadF16() const;
882 
883  bool enableSIScheduler() const {
884  return EnableSIScheduler;
885  }
886 
887  bool loadStoreOptEnabled() const {
888  return EnableLoadStoreOpt;
889  }
890 
891  bool hasSGPRInitBug() const {
892  return SGPRInitBug;
893  }
894 
896 
899  }
900 
901  bool hasMFMAInlineLiteralBug() const {
903  }
904 
905  bool has12DWordStoreHazard() const {
907  }
908 
909  // \returns true if the subtarget supports DWORDX3 load/store instructions.
910  bool hasDwordx3LoadStores() const {
911  return CIInsts;
912  }
913 
916  }
917 
918  bool hasReadM0SendMsgHazard() const {
921  }
922 
923  bool hasVcmpxPermlaneHazard() const {
924  return HasVcmpxPermlaneHazard;
925  }
926 
929  }
930 
933  }
934 
935  bool hasLDSMisalignedBug() const {
936  return LDSMisalignedBug && !EnableCuMode;
937  }
938 
939  bool hasInstFwdPrefetchBug() const {
940  return HasInstFwdPrefetchBug;
941  }
942 
943  bool hasVcmpxExecWARHazard() const {
944  return HasVcmpxExecWARHazard;
945  }
946 
949  }
950 
951  bool hasNSAtoVMEMBug() const {
952  return HasNSAtoVMEMBug;
953  }
954 
955  bool hasNSAClauseBug() const { return HasNSAClauseBug; }
956 
957  bool hasHardClauses() const { return getGeneration() >= GFX10; }
958 
959  bool hasGFX90AInsts() const { return GFX90AInsts; }
960 
961  /// Return if operations acting on VGPR tuples require even alignment.
962  bool needsAlignedVGPRs() const { return GFX90AInsts; }
963 
964  bool hasPackedTID() const { return HasPackedTID; }
965 
966  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
967  /// SGPRs
968  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
969 
970  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
971  /// VGPRs
972  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
973 
974  /// Return occupancy for the given function. Used LDS and a number of
975  /// registers if provided.
976  /// Note, occupancy can be affected by the scratch allocation as well, but
977  /// we do not have enough information to compute it.
978  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
979  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
980 
981  /// \returns true if the flat_scratch register should be initialized with the
982  /// pointer to the wave's scratch memory rather than a size and offset.
983  bool flatScratchIsPointer() const {
985  }
986 
987  /// \returns true if the flat_scratch register is initialized by the HW.
988  /// In this case it is readonly.
990 
991  /// \returns true if the machine has merged shaders in which s0-s7 are
992  /// reserved by the hardware and user SGPRs start at s8
993  bool hasMergedShaders() const {
994  return getGeneration() >= GFX9;
995  }
996 
997  /// \returns SGPR allocation granularity supported by the subtarget.
998  unsigned getSGPRAllocGranule() const {
1000  }
1001 
1002  /// \returns SGPR encoding granularity supported by the subtarget.
1003  unsigned getSGPREncodingGranule() const {
1005  }
1006 
1007  /// \returns Total number of SGPRs supported by the subtarget.
1008  unsigned getTotalNumSGPRs() const {
1009  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1010  }
1011 
1012  /// \returns Addressable number of SGPRs supported by the subtarget.
1013  unsigned getAddressableNumSGPRs() const {
1015  }
1016 
1017  /// \returns Minimum number of SGPRs that meets the given number of waves per
1018  /// execution unit requirement supported by the subtarget.
1019  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1020  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1021  }
1022 
1023  /// \returns Maximum number of SGPRs that meets the given number of waves per
1024  /// execution unit requirement supported by the subtarget.
1025  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1026  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1027  }
1028 
1029  /// \returns Reserved number of SGPRs. This is common
1030  /// utility function called by MachineFunction and
1031  /// Function variants of getReservedNumSGPRs.
1032  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
1033  /// \returns Reserved number of SGPRs for given machine function \p MF.
1034  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1035 
1036  /// \returns Reserved number of SGPRs for given function \p F.
1037  unsigned getReservedNumSGPRs(const Function &F) const;
1038 
1039  /// \returns max num SGPRs. This is the common utility
1040  /// function called by MachineFunction and Function
1041  /// variants of getMaxNumSGPRs.
1042  unsigned getBaseMaxNumSGPRs(const Function &F,
1043  std::pair<unsigned, unsigned> WavesPerEU,
1044  unsigned PreloadedSGPRs,
1045  unsigned ReservedNumSGPRs) const;
1046 
1047  /// \returns Maximum number of SGPRs that meets number of waves per execution
1048  /// unit requirement for function \p MF, or number of SGPRs explicitly
1049  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1050  ///
1051  /// \returns Value that meets number of waves per execution unit requirement
1052  /// if explicitly requested value cannot be converted to integer, violates
1053  /// subtarget's specifications, or does not meet number of waves per execution
1054  /// unit requirement.
1055  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1056 
1057  /// \returns Maximum number of SGPRs that meets number of waves per execution
1058  /// unit requirement for function \p F, or number of SGPRs explicitly
1059  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1060  ///
1061  /// \returns Value that meets number of waves per execution unit requirement
1062  /// if explicitly requested value cannot be converted to integer, violates
1063  /// subtarget's specifications, or does not meet number of waves per execution
1064  /// unit requirement.
1065  unsigned getMaxNumSGPRs(const Function &F) const;
1066 
1067  /// \returns VGPR allocation granularity supported by the subtarget.
1068  unsigned getVGPRAllocGranule() const {
1070  }
1071 
1072  /// \returns VGPR encoding granularity supported by the subtarget.
1073  unsigned getVGPREncodingGranule() const {
1075  }
1076 
1077  /// \returns Total number of VGPRs supported by the subtarget.
1078  unsigned getTotalNumVGPRs() const {
1079  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1080  }
1081 
1082  /// \returns Addressable number of VGPRs supported by the subtarget.
1083  unsigned getAddressableNumVGPRs() const {
1085  }
1086 
1087  /// \returns Minimum number of VGPRs that meets given number of waves per
1088  /// execution unit requirement supported by the subtarget.
1089  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1090  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1091  }
1092 
1093  /// \returns Maximum number of VGPRs that meets given number of waves per
1094  /// execution unit requirement supported by the subtarget.
1095  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1096  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1097  }
1098 
1099  /// \returns max num VGPRs. This is the common utility function
1100  /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1101  unsigned getBaseMaxNumVGPRs(const Function &F,
1102  std::pair<unsigned, unsigned> WavesPerEU) const;
1103  /// \returns Maximum number of VGPRs that meets number of waves per execution
1104  /// unit requirement for function \p F, or number of VGPRs explicitly
1105  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1106  ///
1107  /// \returns Value that meets number of waves per execution unit requirement
1108  /// if explicitly requested value cannot be converted to integer, violates
1109  /// subtarget's specifications, or does not meet number of waves per execution
1110  /// unit requirement.
1111  unsigned getMaxNumVGPRs(const Function &F) const;
1112 
1113  /// \returns Maximum number of VGPRs that meets number of waves per execution
1114  /// unit requirement for function \p MF, or number of VGPRs explicitly
1115  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1116  ///
1117  /// \returns Value that meets number of waves per execution unit requirement
1118  /// if explicitly requested value cannot be converted to integer, violates
1119  /// subtarget's specifications, or does not meet number of waves per execution
1120  /// unit requirement.
1121  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1122 
1123  void getPostRAMutations(
1124  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1125  const override;
1126 
1127  std::unique_ptr<ScheduleDAGMutation>
1129 
1130  bool isWave32() const {
1131  return getWavefrontSize() == 32;
1132  }
1133 
1134  bool isWave64() const {
1135  return getWavefrontSize() == 64;
1136  }
1137 
1139  return getRegisterInfo()->getBoolRC();
1140  }
1141 
1142  /// \returns Maximum number of work groups per compute unit supported by the
1143  /// subtarget and limited by given \p FlatWorkGroupSize.
1144  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1145  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1146  }
1147 
1148  /// \returns Minimum flat work group size supported by the subtarget.
1149  unsigned getMinFlatWorkGroupSize() const override {
1151  }
1152 
1153  /// \returns Maximum flat work group size supported by the subtarget.
1154  unsigned getMaxFlatWorkGroupSize() const override {
1156  }
1157 
1158  /// \returns Number of waves per execution unit required to support the given
1159  /// \p FlatWorkGroupSize.
1160  unsigned
1161  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1162  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1163  }
1164 
1165  /// \returns Minimum number of waves per execution unit supported by the
1166  /// subtarget.
1167  unsigned getMinWavesPerEU() const override {
1168  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1169  }
1170 
1171  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1172  SDep &Dep) const override;
1173 };
1174 
1175 } // end namespace llvm
1176 
1177 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:380
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:137
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:61
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:849
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:336
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:670
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:592
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:612
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:585
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:118
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:39
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:153
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:239
llvm::GCNSubtarget::hasRegisterBanking
bool hasRegisterBanking() const
Definition: GCNSubtarget.h:726
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:654
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:253
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:790
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:650
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:211
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:65
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:597
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:93
llvm::Function
Definition: Function.h:62
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:136
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:125
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:130
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:159
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition: GCNSubtarget.h:955
llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:867
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:786
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:427
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:811
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:722
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:962
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:554
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:170
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:286
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:62
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:490
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:449
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:581
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:138
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:469
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:174
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:436
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:550
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:364
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:464
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1144
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1154
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:522
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:674
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:722
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:662
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:324
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:910
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:865
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:239
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:121
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:559
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:750
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:506
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:415
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const
Definition: AMDGPUSubtarget.cpp:768
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:183
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:887
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:60
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:169
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:714
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:163
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:747
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:833
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition: GCNSubtarget.h:181
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:796
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:547
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:219
llvm::GCNSubtarget::hasLDSFPAtomicAdd
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:808
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:182
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:207
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:399
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:498
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:302
llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:869
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:617
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:119
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:601
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:118
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:158
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:705
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:85
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:263
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:542
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1360
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:298
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:853
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:105
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:752
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:332
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1025
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:726
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:310
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:458
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:360
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:110
llvm::Legalizer
Definition: Legalizer.h:31
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:514
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:565
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:215
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:126
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:642
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:641
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:421
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:732
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:227
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:164
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:845
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:819
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:710
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1149
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:101
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:271
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:730
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:78
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:348
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:718
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:410
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1130
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:891
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:74
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:782
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:352
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:231
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:634
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:578
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:873
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:86
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:698
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:44
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:530
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::SIFrameLowering
Definition: SIFrameLowering.h:21
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1095
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:686
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:63
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:440
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:804
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:502
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:763
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:841
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:666
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:857
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:943
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:196
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1073
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1134
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:179
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:90
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:198
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:70
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:235
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:165
llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:897
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:486
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:939
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::HasRegisterBanking
bool HasRegisterBanking
Definition: GCNSubtarget.h:156
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:171
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:133
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:423
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:624
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:166
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:998
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:863
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:964
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:384
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:112
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:829
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:103
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:895
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:927
llvm::GCNSubtarget::NSAMaxSize
unsigned NSAMaxSize
Definition: GCNSubtarget.h:131
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:625
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:155
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:494
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:172
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:167
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1078
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:183
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:480
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:935
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:901
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:185
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:188
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:314
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:861
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:569
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:646
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:295
llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:837
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:145
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1089
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:914
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1013
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:510
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:690
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:190
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:431
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:959
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:758
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:605
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:734
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:769
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:612
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1161
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:267
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:64
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:328
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:368
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:947
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:143
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:72
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:282
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:577
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:454
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:702
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:546
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:682
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:800
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:877
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:710
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:983
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:388
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:817
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:722
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:821
llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:157
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:825
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:582
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:376
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:589
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1138
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:936
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:678
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:706
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1019
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: AMDGPUSubtarget.cpp:318
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:1098
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:561
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:538
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:73
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:784
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:104
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1068
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:951
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:142
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:758
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:762
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:102
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:122
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:306
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:151
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:629
llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition: GCNSubtarget.h:989
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:243
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:149
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1167
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:139
llvm::GCNSubtarget::HasAtomicFaddInsts
bool HasAtomicFaddInsts
Definition: GCNSubtarget.h:144
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:730
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:813
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:177
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:176
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:855
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:957
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:340
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:474
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:603
llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:176
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:778
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:730
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:123
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:344
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:168
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:320
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:94
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:258
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1083
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:372
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:526
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:162
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:113
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:141
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:534
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:323
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:993
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:97
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:344
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:248
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:518
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:1003
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:572
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:593
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:883
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:736
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:252
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:100
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:152
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:738
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:918
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:124
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:290
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1108
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:255
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:931
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:356
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:154
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:1104
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:174
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:59
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:294
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1008
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:905
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:200
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:923
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:658
llvm::CallLowering
Definition: CallLowering.h:43
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:806
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:395
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:638
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:695
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:896
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:160
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:694
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:129
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:404
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:573
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:127
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:223