LLVM  14.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUMacroFusion.h"
20 #include "AMDGPUTargetObjectFile.h"
22 #include "GCNIterativeScheduler.h"
23 #include "GCNSchedStrategy.h"
24 #include "R600MachineScheduler.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIMachineScheduler.h"
35 #include "llvm/CodeGen/Passes.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/InitializePasses.h"
43 #include "llvm/Transforms/IPO.h"
48 #include "llvm/Transforms/Scalar.h"
51 #include "llvm/Transforms/Utils.h"
54 
55 using namespace llvm;
56 
57 namespace {
58 class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
59 public:
60  SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
61  : RegisterRegAllocBase(N, D, C) {}
62 };
63 
64 class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
65 public:
66  VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
67  : RegisterRegAllocBase(N, D, C) {}
68 };
69 
70 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
71  const TargetRegisterClass &RC) {
72  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
73 }
74 
75 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
76  const TargetRegisterClass &RC) {
77  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
78 }
79 
80 
81 /// -{sgpr|vgpr}-regalloc=... command line option.
82 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
83 
84 /// A dummy default pass factory indicates whether the register allocator is
85 /// overridden on the command line.
86 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
87 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
88 
89 static SGPRRegisterRegAlloc
90 defaultSGPRRegAlloc("default",
91  "pick SGPR register allocator based on -O option",
93 
94 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
96 SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
97  cl::desc("Register allocator to use for SGPRs"));
98 
99 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
101 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
102  cl::desc("Register allocator to use for VGPRs"));
103 
104 
105 static void initializeDefaultSGPRRegisterAllocatorOnce() {
106  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
107 
108  if (!Ctor) {
109  Ctor = SGPRRegAlloc;
110  SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
111  }
112 }
113 
114 static void initializeDefaultVGPRRegisterAllocatorOnce() {
115  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
116 
117  if (!Ctor) {
118  Ctor = VGPRRegAlloc;
119  VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
120  }
121 }
122 
123 static FunctionPass *createBasicSGPRRegisterAllocator() {
124  return createBasicRegisterAllocator(onlyAllocateSGPRs);
125 }
126 
127 static FunctionPass *createGreedySGPRRegisterAllocator() {
128  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
129 }
130 
131 static FunctionPass *createFastSGPRRegisterAllocator() {
132  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
133 }
134 
135 static FunctionPass *createBasicVGPRRegisterAllocator() {
136  return createBasicRegisterAllocator(onlyAllocateVGPRs);
137 }
138 
139 static FunctionPass *createGreedyVGPRRegisterAllocator() {
140  return createGreedyRegisterAllocator(onlyAllocateVGPRs);
141 }
142 
143 static FunctionPass *createFastVGPRRegisterAllocator() {
144  return createFastRegisterAllocator(onlyAllocateVGPRs, true);
145 }
146 
147 static SGPRRegisterRegAlloc basicRegAllocSGPR(
148  "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
149 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
150  "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
151 
152 static SGPRRegisterRegAlloc fastRegAllocSGPR(
153  "fast", "fast register allocator", createFastSGPRRegisterAllocator);
154 
155 
156 static VGPRRegisterRegAlloc basicRegAllocVGPR(
157  "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
158 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
159  "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
160 
161 static VGPRRegisterRegAlloc fastRegAllocVGPR(
162  "fast", "fast register allocator", createFastVGPRRegisterAllocator);
163 }
164 
165 
167  "r600-ir-structurize",
168  cl::desc("Use StructurizeCFG IR pass"),
169  cl::init(true));
170 
172  "amdgpu-sroa",
173  cl::desc("Run SROA after promote alloca pass"),
175  cl::init(true));
176 
177 static cl::opt<bool>
178 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
179  cl::desc("Run early if-conversion"),
180  cl::init(false));
181 
182 static cl::opt<bool>
183 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
184  cl::desc("Run pre-RA exec mask optimizations"),
185  cl::init(true));
186 
188  "r600-if-convert",
189  cl::desc("Use if conversion pass"),
191  cl::init(true));
192 
193 // Option to disable vectorizer for tests.
195  "amdgpu-load-store-vectorizer",
196  cl::desc("Enable load store vectorizer"),
197  cl::init(true),
198  cl::Hidden);
199 
200 // Option to control global loads scalarization
202  "amdgpu-scalarize-global-loads",
203  cl::desc("Enable global load scalarization"),
204  cl::init(true),
205  cl::Hidden);
206 
207 // Option to run internalize pass.
209  "amdgpu-internalize-symbols",
210  cl::desc("Enable elimination of non-kernel functions and unused globals"),
211  cl::init(false),
212  cl::Hidden);
213 
214 // Option to inline all early.
216  "amdgpu-early-inline-all",
217  cl::desc("Inline all functions early"),
218  cl::init(false),
219  cl::Hidden);
220 
222  "amdgpu-sdwa-peephole",
223  cl::desc("Enable SDWA peepholer"),
224  cl::init(true));
225 
227  "amdgpu-dpp-combine",
228  cl::desc("Enable DPP combiner"),
229  cl::init(true));
230 
231 // Enable address space based alias analysis
232 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
233  cl::desc("Enable AMDGPU Alias Analysis"),
234  cl::init(true));
235 
236 // Option to run late CFG structurizer
238  "amdgpu-late-structurize",
239  cl::desc("Enable late CFG structurization"),
241  cl::Hidden);
242 
244  "amdgpu-function-calls",
245  cl::desc("Enable AMDGPU function call support"),
247  cl::init(true),
248  cl::Hidden);
249 
251  "amdgpu-fixed-function-abi",
252  cl::desc("Enable all implicit function arguments"),
254  cl::init(false),
255  cl::Hidden);
256 
257 // Enable lib calls simplifications
259  "amdgpu-simplify-libcall",
260  cl::desc("Enable amdgpu library simplifications"),
261  cl::init(true),
262  cl::Hidden);
263 
265  "amdgpu-ir-lower-kernel-arguments",
266  cl::desc("Lower kernel argument loads in IR pass"),
267  cl::init(true),
268  cl::Hidden);
269 
271  "amdgpu-reassign-regs",
272  cl::desc("Enable register reassign optimizations on gfx10+"),
273  cl::init(true),
274  cl::Hidden);
275 
277  "amdgpu-opt-vgpr-liverange",
278  cl::desc("Enable VGPR liverange optimizations for if-else structure"),
279  cl::init(true), cl::Hidden);
280 
281 // Enable atomic optimization
283  "amdgpu-atomic-optimizations",
284  cl::desc("Enable atomic optimizations"),
285  cl::init(false),
286  cl::Hidden);
287 
288 // Enable Mode register optimization
290  "amdgpu-mode-register",
291  cl::desc("Enable mode register pass"),
292  cl::init(true),
293  cl::Hidden);
294 
295 // Option is used in lit tests to prevent deadcoding of patterns inspected.
296 static cl::opt<bool>
297 EnableDCEInRA("amdgpu-dce-in-ra",
298  cl::init(true), cl::Hidden,
299  cl::desc("Enable machine DCE inside regalloc"));
300 
302  "amdgpu-scalar-ir-passes",
303  cl::desc("Enable scalar IR passes"),
304  cl::init(true),
305  cl::Hidden);
306 
308  "amdgpu-enable-structurizer-workarounds",
309  cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
310  cl::Hidden);
311 
313  "amdgpu-enable-lds-replace-with-pointer",
314  cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
315  cl::Hidden);
316 
318  "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
320  cl::Hidden);
321 
323  "amdgpu-enable-pre-ra-optimizations",
324  cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
325  cl::Hidden);
326 
328  // Register the target
331 
397 }
398 
399 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
400  return std::make_unique<AMDGPUTargetObjectFile>();
401 }
402 
404  return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
405 }
406 
408  return new SIScheduleDAGMI(C);
409 }
410 
411 static ScheduleDAGInstrs *
413  ScheduleDAGMILive *DAG =
414  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
418  return DAG;
419 }
420 
421 static ScheduleDAGInstrs *
423  auto DAG = new GCNIterativeScheduler(C,
425  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
426  return DAG;
427 }
428 
430  return new GCNIterativeScheduler(C,
432 }
433 
434 static ScheduleDAGInstrs *
436  auto DAG = new GCNIterativeScheduler(C,
438  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
439  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
440  return DAG;
441 }
442 
444 R600SchedRegistry("r600", "Run R600's custom scheduler",
446 
448 SISchedRegistry("si", "Run SI's custom scheduler",
450 
452 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
453  "Run GCN scheduler to maximize occupancy",
455 
457 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
458  "Run GCN scheduler to maximize occupancy (experimental)",
460 
462 GCNMinRegSchedRegistry("gcn-minreg",
463  "Run GCN iterative scheduler for minimal register usage (experimental)",
465 
467 GCNILPSchedRegistry("gcn-ilp",
468  "Run GCN iterative scheduler for ILP scheduling (experimental)",
470 
471 static StringRef computeDataLayout(const Triple &TT) {
472  if (TT.getArch() == Triple::r600) {
473  // 32-bit pointers.
474  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
475  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
476  }
477 
478  // 32-bit private, local, and region pointers. 64-bit global, constant and
479  // flat, non-integral buffer fat pointers.
480  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
481  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
482  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
483  "-ni:7";
484 }
485 
487 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
488  if (!GPU.empty())
489  return GPU;
490 
491  // Need to default to a target with flat support for HSA.
492  if (TT.getArch() == Triple::amdgcn)
493  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
494 
495  return "r600";
496 }
497 
499  // The AMDGPU toolchain only supports generating shared objects, so we
500  // must always use PIC.
501  return Reloc::PIC_;
502 }
503 
505  StringRef CPU, StringRef FS,
509  CodeGenOpt::Level OptLevel)
512  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
513  TLOF(createTLOF(getTargetTriple())) {
514  initAsmInfo();
515  if (TT.getArch() == Triple::amdgcn) {
516  if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
518  else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
520  }
521 }
522 
527 
529 
531  Attribute GPUAttr = F.getFnAttribute("target-cpu");
532  return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
533 }
534 
536  Attribute FSAttr = F.getFnAttribute("target-features");
537 
538  return FSAttr.isValid() ? FSAttr.getValueAsString()
540 }
541 
542 /// Predicate for Internalize pass.
543 static bool mustPreserveGV(const GlobalValue &GV) {
544  if (const Function *F = dyn_cast<Function>(&GV))
545  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
546 
548  return !GV.use_empty();
549 }
550 
552  Builder.DivergentTarget = true;
553 
554  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
555  bool Internalize = InternalizeSymbols;
556  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
557  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
558  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
559 
560  if (EnableFunctionCalls) {
561  delete Builder.Inliner;
563  }
564 
565  Builder.addExtension(
567  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
569  if (AMDGPUAA) {
572  }
575  if (Internalize)
578  if (Internalize)
579  PM.add(createGlobalDCEPass());
580  if (EarlyInline)
582  });
583 
584  Builder.addExtension(
586  [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
588  if (AMDGPUAA) {
591  }
594  if (LibCallSimplify)
596  });
597 
598  Builder.addExtension(
600  [EnableOpt](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
601  // Add infer address spaces pass to the opt pipeline after inlining
602  // but before SROA to increase SROA opportunities.
604 
605  // This should run after inlining to have any chance of doing anything,
606  // and before other cleanup optimizations.
608 
609  // Promote alloca to vector before SROA and loop unroll. If we manage
610  // to eliminate allocas before unroll we may choose to unroll less.
611  if (EnableOpt)
613  });
614 }
615 
618 }
619 
624  if (PassName == "amdgpu-propagate-attributes-late") {
626  return true;
627  }
628  if (PassName == "amdgpu-unify-metadata") {
630  return true;
631  }
632  if (PassName == "amdgpu-printf-runtime-binding") {
634  return true;
635  }
636  if (PassName == "amdgpu-always-inline") {
638  return true;
639  }
640  if (PassName == "amdgpu-replace-lds-use-with-pointer") {
642  return true;
643  }
644  if (PassName == "amdgpu-lower-module-lds") {
646  return true;
647  }
648  return false;
649  });
653  if (PassName == "amdgpu-simplifylib") {
655  return true;
656  }
657  if (PassName == "amdgpu-usenative") {
659  return true;
660  }
661  if (PassName == "amdgpu-promote-alloca") {
662  PM.addPass(AMDGPUPromoteAllocaPass(*this));
663  return true;
664  }
665  if (PassName == "amdgpu-promote-alloca-to-vector") {
667  return true;
668  }
669  if (PassName == "amdgpu-lower-kernel-attributes") {
671  return true;
672  }
673  if (PassName == "amdgpu-propagate-attributes-early") {
675  return true;
676  }
677  return false;
678  });
679 
681  FAM.registerPass([&] { return AMDGPUAA(); });
682  });
683 
684  PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
685  if (AAName == "amdgpu-aa") {
687  return true;
688  }
689  return false;
690  });
691 
700  });
701 
705  return;
706 
709 
710  if (InternalizeSymbols) {
712  }
714  if (InternalizeSymbols) {
715  PM.addPass(GlobalDCEPass());
716  }
719  });
720 
724  return;
725 
727 
728  // Add infer address spaces pass to the opt pipeline after inlining
729  // but before SROA to increase SROA opportunities.
731 
732  // This should run after inlining to have any chance of doing
733  // anything, and before other cleanup optimizations.
735 
736  if (Level != OptimizationLevel::O0) {
737  // Promote alloca to vector before SROA and loop unroll. If we
738  // manage to eliminate allocas before unroll we may choose to unroll
739  // less.
741  }
742 
744  });
745 }
746 
747 //===----------------------------------------------------------------------===//
748 // R600 Target Machine (R600 -> Cayman)
749 //===----------------------------------------------------------------------===//
750 
752  StringRef CPU, StringRef FS,
756  CodeGenOpt::Level OL, bool JIT)
757  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
759 
760  // Override the default since calls aren't supported for r600.
761  if (EnableFunctionCalls &&
762  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
763  EnableFunctionCalls = false;
764 }
765 
767  const Function &F) const {
768  StringRef GPU = getGPUName(F);
770 
771  SmallString<128> SubtargetKey(GPU);
772  SubtargetKey.append(FS);
773 
774  auto &I = SubtargetMap[SubtargetKey];
775  if (!I) {
776  // This needs to be done before we create a new subtarget since any
777  // creation will depend on the TM and the code generation flags on the
778  // function that reside in TargetOptions.
780  I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
781  }
782 
783  return I.get();
784 }
785 
786 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
787  return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
788  AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
789  AddrSpace == AMDGPUAS::REGION_ADDRESS)
790  ? -1
791  : 0;
792 }
793 
795  unsigned DestAS) const {
796  return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
798 }
799 
801  const auto *LD = dyn_cast<LoadInst>(V);
802  if (!LD)
804 
805  // It must be a generic pointer loaded.
806  assert(V->getType()->isPointerTy() &&
808 
809  const auto *Ptr = LD->getPointerOperand();
810  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
812  // For a generic pointer loaded from the constant memory, it could be assumed
813  // as a global pointer since the constant memory is only populated on the
814  // host side. As implied by the offload programming model, only global
815  // pointers could be referenced on the host side.
817 }
818 
821  return TargetTransformInfo(R600TTIImpl(this, F));
822 }
823 
824 //===----------------------------------------------------------------------===//
825 // GCN Target Machine (SI+)
826 //===----------------------------------------------------------------------===//
827 
829  StringRef CPU, StringRef FS,
833  CodeGenOpt::Level OL, bool JIT)
834  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
835 
837  StringRef GPU = getGPUName(F);
839 
840  SmallString<128> SubtargetKey(GPU);
841  SubtargetKey.append(FS);
842 
843  auto &I = SubtargetMap[SubtargetKey];
844  if (!I) {
845  // This needs to be done before we create a new subtarget since any
846  // creation will depend on the TM and the code generation flags on the
847  // function that reside in TargetOptions.
849  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
850  }
851 
852  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
853 
854  return I.get();
855 }
856 
859  return TargetTransformInfo(GCNTTIImpl(this, F));
860 }
861 
862 //===----------------------------------------------------------------------===//
863 // AMDGPU Pass Setup
864 //===----------------------------------------------------------------------===//
865 
866 namespace {
867 
868 class AMDGPUPassConfig : public TargetPassConfig {
869 public:
870  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
871  : TargetPassConfig(TM, PM) {
872  // Exceptions and StackMaps are not supported, so these passes will never do
873  // anything.
874  disablePass(&StackMapLivenessID);
875  disablePass(&FuncletLayoutID);
876  // Garbage collection is not supported.
877  disablePass(&GCLoweringID);
878  disablePass(&ShadowStackGCLoweringID);
879  }
880 
881  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
882  return getTM<AMDGPUTargetMachine>();
883  }
884 
886  createMachineScheduler(MachineSchedContext *C) const override {
889  return DAG;
890  }
891 
892  void addEarlyCSEOrGVNPass();
893  void addStraightLineScalarOptimizationPasses();
894  void addIRPasses() override;
895  void addCodeGenPrepare() override;
896  bool addPreISel() override;
897  bool addInstSelector() override;
898  bool addGCPasses() override;
899 
900  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
901 
902  /// Check if a pass is enabled given \p Opt option. The option always
903  /// overrides defaults if explicitely used. Otherwise its default will
904  /// be used given that a pass shall work at an optimization \p Level
905  /// minimum.
906  bool isPassEnabled(const cl::opt<bool> &Opt,
908  if (Opt.getNumOccurrences())
909  return Opt;
910  if (TM->getOptLevel() < Level)
911  return false;
912  return Opt;
913  }
914 };
915 
916 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
917  return getStandardCSEConfigForOpt(TM->getOptLevel());
918 }
919 
920 class R600PassConfig final : public AMDGPUPassConfig {
921 public:
922  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
923  : AMDGPUPassConfig(TM, PM) {}
924 
925  ScheduleDAGInstrs *createMachineScheduler(
926  MachineSchedContext *C) const override {
928  }
929 
930  bool addPreISel() override;
931  bool addInstSelector() override;
932  void addPreRegAlloc() override;
933  void addPreSched2() override;
934  void addPreEmitPass() override;
935 };
936 
937 class GCNPassConfig final : public AMDGPUPassConfig {
938 public:
939  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
940  : AMDGPUPassConfig(TM, PM) {
941  // It is necessary to know the register usage of the entire call graph. We
942  // allow calls without EnableAMDGPUFunctionCalls if they are marked
943  // noinline, so this is always required.
944  setRequiresCodeGenSCCOrder(true);
945  }
946 
947  GCNTargetMachine &getGCNTargetMachine() const {
948  return getTM<GCNTargetMachine>();
949  }
950 
952  createMachineScheduler(MachineSchedContext *C) const override;
953 
954  bool addPreISel() override;
955  void addMachineSSAOptimization() override;
956  bool addILPOpts() override;
957  bool addInstSelector() override;
958  bool addIRTranslator() override;
959  void addPreLegalizeMachineIR() override;
960  bool addLegalizeMachineIR() override;
961  void addPreRegBankSelect() override;
962  bool addRegBankSelect() override;
963  void addPreGlobalInstructionSelect() override;
964  bool addGlobalInstructionSelect() override;
965  void addFastRegAlloc() override;
966  void addOptimizedRegAlloc() override;
967 
968  FunctionPass *createSGPRAllocPass(bool Optimized);
969  FunctionPass *createVGPRAllocPass(bool Optimized);
970  FunctionPass *createRegAllocPass(bool Optimized) override;
971 
972  bool addRegAssignAndRewriteFast() override;
973  bool addRegAssignAndRewriteOptimized() override;
974 
975  void addPreRegAlloc() override;
976  bool addPreRewrite() override;
977  void addPostRegAlloc() override;
978  void addPreSched2() override;
979  void addPreEmitPass() override;
980 };
981 
982 } // end anonymous namespace
983 
984 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
985  if (getOptLevel() == CodeGenOpt::Aggressive)
986  addPass(createGVNPass());
987  else
988  addPass(createEarlyCSEPass());
989 }
990 
991 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
992  addPass(createLICMPass());
995  // ReassociateGEPs exposes more opportunites for SLSR. See
996  // the example in reassociate-geps-and-slsr.ll.
998  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
999  // EarlyCSE can reuse.
1000  addEarlyCSEOrGVNPass();
1001  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
1002  addPass(createNaryReassociatePass());
1003  // NaryReassociate on GEPs creates redundant common expressions, so run
1004  // EarlyCSE after it.
1005  addPass(createEarlyCSEPass());
1006 }
1007 
1008 void AMDGPUPassConfig::addIRPasses() {
1009  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
1010 
1011  // There is no reason to run these.
1012  disablePass(&StackMapLivenessID);
1013  disablePass(&FuncletLayoutID);
1014  disablePass(&PatchableFunctionID);
1015 
1017 
1018  // This must occur before inlining, as the inliner will not look through
1019  // bitcast calls.
1021 
1022  // A call to propagate attributes pass in the backend in case opt was not run.
1024 
1026 
1027  // Function calls are not supported, so make sure we inline everything.
1028  addPass(createAMDGPUAlwaysInlinePass());
1029  addPass(createAlwaysInlinerLegacyPass());
1030  // We need to add the barrier noop pass, otherwise adding the function
1031  // inlining pass will cause all of the PassConfigs passes to be run
1032  // one function at a time, which means if we have a nodule with two
1033  // functions, then we will generate code for the first function
1034  // without ever running any passes on the second.
1035  addPass(createBarrierNoopPass());
1036 
1037  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
1038  if (TM.getTargetTriple().getArch() == Triple::r600)
1040 
1041  // Replace OpenCL enqueued block function pointers with global variables.
1043 
1044  // Can increase LDS used by kernel so runs before PromoteAlloca
1045  if (EnableLowerModuleLDS) {
1046  // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
1047  // pass "amdgpu-lower-module-lds", and also it required to be run only if
1048  // "amdgpu-lower-module-lds" pass is enabled.
1051 
1052  addPass(createAMDGPULowerModuleLDSPass());
1053  }
1054 
1055  if (TM.getOptLevel() > CodeGenOpt::None)
1056  addPass(createInferAddressSpacesPass());
1057 
1058  addPass(createAtomicExpandPass());
1059 
1060  if (TM.getOptLevel() > CodeGenOpt::None) {
1061  addPass(createAMDGPUPromoteAlloca());
1062 
1063  if (EnableSROA)
1064  addPass(createSROAPass());
1065  if (isPassEnabled(EnableScalarIRPasses))
1066  addStraightLineScalarOptimizationPasses();
1067 
1069  addPass(createAMDGPUAAWrapperPass());
1070  addPass(createExternalAAWrapperPass([](Pass &P, Function &,
1071  AAResults &AAR) {
1072  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1073  AAR.addAAResult(WrapperPass->getResult());
1074  }));
1075  }
1076 
1077  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
1078  // TODO: May want to move later or split into an early and late one.
1079  addPass(createAMDGPUCodeGenPreparePass());
1080  }
1081  }
1082 
1084 
1085  // EarlyCSE is not always strong enough to clean up what LSR produces. For
1086  // example, GVN can combine
1087  //
1088  // %0 = add %a, %b
1089  // %1 = add %b, %a
1090  //
1091  // and
1092  //
1093  // %0 = shl nsw %a, 2
1094  // %1 = shl %a, 2
1095  //
1096  // but EarlyCSE can do neither of them.
1097  if (isPassEnabled(EnableScalarIRPasses))
1098  addEarlyCSEOrGVNPass();
1099 }
1100 
1101 void AMDGPUPassConfig::addCodeGenPrepare() {
1102  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
1104 
1105  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1108 
1110 
1111  if (isPassEnabled(EnableLoadStoreVectorizer))
1112  addPass(createLoadStoreVectorizerPass());
1113 
1114  // LowerSwitch pass may introduce unreachable blocks that can
1115  // cause unexpected behavior for subsequent passes. Placing it
1116  // here seems better that these blocks would get cleaned up by
1117  // UnreachableBlockElim inserted next in the pass flow.
1118  addPass(createLowerSwitchPass());
1119 }
1120 
1121 bool AMDGPUPassConfig::addPreISel() {
1122  if (TM->getOptLevel() > CodeGenOpt::None)
1123  addPass(createFlattenCFGPass());
1124  return false;
1125 }
1126 
1127 bool AMDGPUPassConfig::addInstSelector() {
1128  // Defer the verifier until FinalizeISel.
1129  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
1130  return false;
1131 }
1132 
1133 bool AMDGPUPassConfig::addGCPasses() {
1134  // Do nothing. GC is not supported.
1135  return false;
1136 }
1137 
1138 //===----------------------------------------------------------------------===//
1139 // R600 Pass Setup
1140 //===----------------------------------------------------------------------===//
1141 
1142 bool R600PassConfig::addPreISel() {
1143  AMDGPUPassConfig::addPreISel();
1144 
1146  addPass(createStructurizeCFGPass());
1147  return false;
1148 }
1149 
1150 bool R600PassConfig::addInstSelector() {
1151  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
1152  return false;
1153 }
1154 
1155 void R600PassConfig::addPreRegAlloc() {
1156  addPass(createR600VectorRegMerger());
1157 }
1158 
1159 void R600PassConfig::addPreSched2() {
1160  addPass(createR600EmitClauseMarkers(), false);
1161  if (EnableR600IfConvert)
1162  addPass(&IfConverterID, false);
1163  addPass(createR600ClauseMergePass(), false);
1164 }
1165 
1166 void R600PassConfig::addPreEmitPass() {
1167  addPass(createAMDGPUCFGStructurizerPass(), false);
1168  addPass(createR600ExpandSpecialInstrsPass(), false);
1169  addPass(&FinalizeMachineBundlesID, false);
1170  addPass(createR600Packetizer(), false);
1171  addPass(createR600ControlFlowFinalizer(), false);
1172 }
1173 
1175  return new R600PassConfig(*this, PM);
1176 }
1177 
1178 //===----------------------------------------------------------------------===//
1179 // GCN Pass Setup
1180 //===----------------------------------------------------------------------===//
1181 
1182 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1183  MachineSchedContext *C) const {
1184  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1185  if (ST.enableSIScheduler())
1186  return createSIMachineScheduler(C);
1188 }
1189 
1190 bool GCNPassConfig::addPreISel() {
1191  AMDGPUPassConfig::addPreISel();
1192 
1193  if (TM->getOptLevel() > CodeGenOpt::None)
1195 
1196  if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1198  }
1199 
1200  if (TM->getOptLevel() > CodeGenOpt::None)
1201  addPass(createSinkingPass());
1202 
1203  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1204  // regions formed by them.
1206  if (!LateCFGStructurize) {
1208  addPass(createFixIrreduciblePass());
1209  addPass(createUnifyLoopExitsPass());
1210  }
1211  addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1212  }
1214  if (!LateCFGStructurize) {
1216  }
1217  addPass(createLCSSAPass());
1218 
1219  if (TM->getOptLevel() > CodeGenOpt::Less)
1220  addPass(&AMDGPUPerfHintAnalysisID);
1221 
1222  return false;
1223 }
1224 
1225 void GCNPassConfig::addMachineSSAOptimization() {
1227 
1228  // We want to fold operands after PeepholeOptimizer has run (or as part of
1229  // it), because it will eliminate extra copies making it easier to fold the
1230  // real source operand. We want to eliminate dead instructions after, so that
1231  // we see fewer uses of the copies. We then need to clean up the dead
1232  // instructions leftover after the operands are folded as well.
1233  //
1234  // XXX - Can we get away without running DeadMachineInstructionElim again?
1235  addPass(&SIFoldOperandsID);
1236  if (EnableDPPCombine)
1237  addPass(&GCNDPPCombineID);
1238  addPass(&SILoadStoreOptimizerID);
1239  if (isPassEnabled(EnableSDWAPeephole)) {
1240  addPass(&SIPeepholeSDWAID);
1241  addPass(&EarlyMachineLICMID);
1242  addPass(&MachineCSEID);
1243  addPass(&SIFoldOperandsID);
1244  }
1245  addPass(&DeadMachineInstructionElimID);
1246  addPass(createSIShrinkInstructionsPass());
1247 }
1248 
1249 bool GCNPassConfig::addILPOpts() {
1251  addPass(&EarlyIfConverterID);
1252 
1254  return false;
1255 }
1256 
1257 bool GCNPassConfig::addInstSelector() {
1258  AMDGPUPassConfig::addInstSelector();
1259  addPass(&SIFixSGPRCopiesID);
1260  addPass(createSILowerI1CopiesPass());
1261  return false;
1262 }
1263 
1264 bool GCNPassConfig::addIRTranslator() {
1265  addPass(new IRTranslator(getOptLevel()));
1266  return false;
1267 }
1268 
1269 void GCNPassConfig::addPreLegalizeMachineIR() {
1270  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1271  addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1272  addPass(new Localizer());
1273 }
1274 
1275 bool GCNPassConfig::addLegalizeMachineIR() {
1276  addPass(new Legalizer());
1277  return false;
1278 }
1279 
1280 void GCNPassConfig::addPreRegBankSelect() {
1281  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1282  addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1283 }
1284 
1285 bool GCNPassConfig::addRegBankSelect() {
1286  addPass(new RegBankSelect());
1287  return false;
1288 }
1289 
1290 void GCNPassConfig::addPreGlobalInstructionSelect() {
1291  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1292  addPass(createAMDGPURegBankCombiner(IsOptNone));
1293 }
1294 
1295 bool GCNPassConfig::addGlobalInstructionSelect() {
1296  addPass(new InstructionSelect(getOptLevel()));
1297  return false;
1298 }
1299 
1300 void GCNPassConfig::addPreRegAlloc() {
1301  if (LateCFGStructurize) {
1303  }
1304 }
1305 
1306 void GCNPassConfig::addFastRegAlloc() {
1307  // FIXME: We have to disable the verifier here because of PHIElimination +
1308  // TwoAddressInstructions disabling it.
1309 
1310  // This must be run immediately after phi elimination and before
1311  // TwoAddressInstructions, otherwise the processing of the tied operand of
1312  // SI_ELSE will introduce a copy of the tied operand source after the else.
1313  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
1314 
1317 
1319 }
1320 
1321 void GCNPassConfig::addOptimizedRegAlloc() {
1322  // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1323  // instructions that cause scheduling barriers.
1324  insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1326 
1327  if (OptExecMaskPreRA)
1329 
1330  if (isPassEnabled(EnablePreRAOptimizations))
1332 
1333  // This is not an essential optimization and it has a noticeable impact on
1334  // compilation time, so we only enable it from O2.
1335  if (TM->getOptLevel() > CodeGenOpt::Less)
1337 
1338  // FIXME: when an instruction has a Killed operand, and the instruction is
1339  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1340  // the register in LiveVariables, this would trigger a failure in verifier,
1341  // we should fix it and enable the verifier.
1342  if (OptVGPRLiveRange)
1343  insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID, false);
1344  // This must be run immediately after phi elimination and before
1345  // TwoAddressInstructions, otherwise the processing of the tied operand of
1346  // SI_ELSE will introduce a copy of the tied operand source after the else.
1347  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
1348 
1349  if (EnableDCEInRA)
1351 
1353 }
1354 
1355 bool GCNPassConfig::addPreRewrite() {
1356  if (EnableRegReassign)
1357  addPass(&GCNNSAReassignID);
1358  return true;
1359 }
1360 
1361 FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1362  // Initialize the global default.
1363  llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1364  initializeDefaultSGPRRegisterAllocatorOnce);
1365 
1366  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1367  if (Ctor != useDefaultRegisterAllocator)
1368  return Ctor();
1369 
1370  if (Optimized)
1371  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1372 
1373  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1374 }
1375 
1376 FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1377  // Initialize the global default.
1378  llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1379  initializeDefaultVGPRRegisterAllocatorOnce);
1380 
1381  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1382  if (Ctor != useDefaultRegisterAllocator)
1383  return Ctor();
1384 
1385  if (Optimized)
1386  return createGreedyVGPRRegisterAllocator();
1387 
1388  return createFastVGPRRegisterAllocator();
1389 }
1390 
1391 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1392  llvm_unreachable("should not be used");
1393 }
1394 
1395 static const char RegAllocOptNotSupportedMessage[] =
1396  "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1397 
1398 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1399  if (!usingDefaultRegAlloc())
1401 
1402  addPass(createSGPRAllocPass(false));
1403 
1404  // Equivalent of PEI for SGPRs.
1405  addPass(&SILowerSGPRSpillsID);
1406 
1407  addPass(createVGPRAllocPass(false));
1408  return true;
1409 }
1410 
1411 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1412  if (!usingDefaultRegAlloc())
1414 
1415  addPass(createSGPRAllocPass(true));
1416 
1417  // Commit allocated register changes. This is mostly necessary because too
1418  // many things rely on the use lists of the physical registers, such as the
1419  // verifier. This is only necessary with allocators which use LiveIntervals,
1420  // since FastRegAlloc does the replacments itself.
1421  addPass(createVirtRegRewriter(false));
1422 
1423  // Equivalent of PEI for SGPRs.
1424  addPass(&SILowerSGPRSpillsID);
1425 
1426  addPass(createVGPRAllocPass(true));
1427 
1428  addPreRewrite();
1429  addPass(&VirtRegRewriterID);
1430 
1431  return true;
1432 }
1433 
1434 void GCNPassConfig::addPostRegAlloc() {
1435  addPass(&SIFixVGPRCopiesID);
1436  if (getOptLevel() > CodeGenOpt::None)
1437  addPass(&SIOptimizeExecMaskingID);
1439 }
1440 
1441 void GCNPassConfig::addPreSched2() {
1442  addPass(&SIPostRABundlerID);
1443 }
1444 
1445 void GCNPassConfig::addPreEmitPass() {
1446  addPass(createSIMemoryLegalizerPass());
1447  addPass(createSIInsertWaitcntsPass());
1448 
1449  if (TM->getOptLevel() > CodeGenOpt::None)
1450  addPass(createSIShrinkInstructionsPass());
1451 
1452  addPass(createSIModeRegisterPass());
1453 
1454  if (getOptLevel() > CodeGenOpt::None)
1455  addPass(&SIInsertHardClausesID);
1456 
1457  addPass(&SILateBranchLoweringPassID);
1458  if (getOptLevel() > CodeGenOpt::None)
1459  addPass(&SIPreEmitPeepholeID);
1460  // The hazard recognizer that runs as part of the post-ra scheduler does not
1461  // guarantee to be able handle all hazards correctly. This is because if there
1462  // are multiple scheduling regions in a basic block, the regions are scheduled
1463  // bottom up, so when we begin to schedule a region we don't know what
1464  // instructions were emitted directly before it.
1465  //
1466  // Here we add a stand-alone hazard recognizer pass which can handle all
1467  // cases.
1468  addPass(&PostRAHazardRecognizerID);
1469  addPass(&BranchRelaxationPassID);
1470 }
1471 
1473  return new GCNPassConfig(*this, PM);
1474 }
1475 
1477  return new yaml::SIMachineFunctionInfo();
1478 }
1479 
1483  return new yaml::SIMachineFunctionInfo(
1484  *MFI, *MF.getSubtarget().getRegisterInfo(), MF);
1485 }
1486 
1489  SMDiagnostic &Error, SMRange &SourceRange) const {
1490  const yaml::SIMachineFunctionInfo &YamlMFI =
1491  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1492  MachineFunction &MF = PFS.MF;
1494 
1495  if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1496  return true;
1497 
1498  if (MFI->Occupancy == 0) {
1499  // Fixup the subtarget dependent default value.
1500  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1501  MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1502  }
1503 
1504  auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1505  Register TempReg;
1506  if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1507  SourceRange = RegName.SourceRange;
1508  return true;
1509  }
1510  RegVal = TempReg;
1511 
1512  return false;
1513  };
1514 
1515  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1516  // Create a diagnostic for a the register string literal.
1517  const MemoryBuffer &Buffer =
1518  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1519  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1520  RegName.Value.size(), SourceMgr::DK_Error,
1521  "incorrect register class for field", RegName.Value,
1522  None, None);
1523  SourceRange = RegName.SourceRange;
1524  return true;
1525  };
1526 
1527  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1528  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1529  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1530  return true;
1531 
1532  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1533  !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1534  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1535  }
1536 
1537  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1538  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1539  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1540  }
1541 
1542  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1543  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1544  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1545  }
1546 
1547  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1548  const TargetRegisterClass &RC,
1549  ArgDescriptor &Arg, unsigned UserSGPRs,
1550  unsigned SystemSGPRs) {
1551  // Skip parsing if it's not present.
1552  if (!A)
1553  return false;
1554 
1555  if (A->IsRegister) {
1556  Register Reg;
1557  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1558  SourceRange = A->RegisterName.SourceRange;
1559  return true;
1560  }
1561  if (!RC.contains(Reg))
1562  return diagnoseRegisterClass(A->RegisterName);
1564  } else
1565  Arg = ArgDescriptor::createStack(A->StackOffset);
1566  // Check and apply the optional mask.
1567  if (A->Mask)
1568  Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
1569 
1570  MFI->NumUserSGPRs += UserSGPRs;
1571  MFI->NumSystemSGPRs += SystemSGPRs;
1572  return false;
1573  };
1574 
1575  if (YamlMFI.ArgInfo &&
1576  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1577  AMDGPU::SGPR_128RegClass,
1578  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1579  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1580  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1581  2, 0) ||
1582  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1583  MFI->ArgInfo.QueuePtr, 2, 0) ||
1584  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1585  AMDGPU::SReg_64RegClass,
1586  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1587  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1588  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1589  2, 0) ||
1590  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1591  AMDGPU::SReg_64RegClass,
1592  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1593  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1594  AMDGPU::SGPR_32RegClass,
1595  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1596  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1597  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1598  0, 1) ||
1599  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1600  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1601  0, 1) ||
1602  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1603  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1604  0, 1) ||
1605  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1606  AMDGPU::SGPR_32RegClass,
1607  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1608  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1609  AMDGPU::SGPR_32RegClass,
1610  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1611  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1612  AMDGPU::SReg_64RegClass,
1613  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1614  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1615  AMDGPU::SReg_64RegClass,
1616  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1617  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1618  AMDGPU::VGPR_32RegClass,
1619  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1620  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1621  AMDGPU::VGPR_32RegClass,
1622  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1623  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1624  AMDGPU::VGPR_32RegClass,
1625  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1626  return true;
1627 
1628  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1629  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1634 
1635  return false;
1636 }
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition: AliasAnalysis.h:465
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition: TargetPassConfig.h:419
llvm::createR600ExpandSpecialInstrsPass
FunctionPass * createR600ExpandSpecialInstrsPass()
Definition: R600ExpandSpecialInstrs.cpp:57
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:198
llvm::createFastRegisterAllocator
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
Definition: RegAllocFast.cpp:1567
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: AMDGPUAliasAnalysis.h:50
llvm::ArgDescriptor::createStack
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:49
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition: AMDGPUArgumentUsageInfo.h:126
llvm::AMDGPUTargetMachine::EnableFixedFunctionABI
static bool EnableFixedFunctionABI
Definition: AMDGPUTargetMachine.h:37
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:494
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition: AMDGPUTargetMachine.cpp:327
RegAllocOptNotSupportedMessage
static const char RegAllocOptNotSupportedMessage[]
Definition: AMDGPUTargetMachine.cpp:1395
llvm::InferAddressSpacesPass
Definition: InferAddressSpaces.h:16
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:165
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
PassBuilder.h
llvm::createGreedyRegisterAllocator
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
Definition: RegAllocGreedy.cpp:635
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:167
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: AMDGPUTargetMachine.cpp:616
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition: AMDGPUTargetMachine.cpp:543
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:499
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition: AMDGPUTargetMachine.cpp:1481
llvm::AMDGPULowerModuleLDSPass
Definition: AMDGPU.h:165
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition: SIMemoryLegalizer.cpp:1865
llvm::SILowerSGPRSpillsID
char & SILowerSGPRSpillsID
Definition: SILowerSGPRSpills.cpp:73
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:34
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:492
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::TargetOptions
Definition: TargetOptions.h:113
llvm::AMDGPUAlwaysInlinePass
Definition: AMDGPU.h:273
llvm::yaml::SIMachineFunctionInfo::ArgInfo
Optional< SIArgumentInfo > ArgInfo
Definition: SIMachineFunctionInfo.h:290
SIMachineFunctionInfo.h
Scalar.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:429
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:61
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:459
llvm::Attribute
Definition: Attributes.h:52
llvm::AMDGPU::SIModeRegisterDefaults::FP32OutputDenormals
bool FP32OutputDenormals
Definition: AMDGPUBaseInfo.h:921
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition: MIRYamlMapping.h:673
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition: PHIElimination.cpp:129
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
llvm::initializeSIPreAllocateWWMRegsPass
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
InferAddressSpaces.h
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:912
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition: AlwaysInliner.cpp:169
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition: AMDGPUTargetMachine.cpp:487
R600MachineScheduler.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::AMDGPUPromoteAllocaToVectorPass
Definition: AMDGPU.h:258
llvm::initializeAMDGPULateCodeGenPreparePass
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition: FixIrreducible.cpp:103
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:136
llvm::createVirtRegRewriter
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
Definition: VirtRegMap.cpp:653
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
GCNSchedStrategy.h
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition: GCNIterativeScheduler.h:37
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:733
llvm::createAMDGPULateCodeGenPreparePass
FunctionPass * createAMDGPULateCodeGenPreparePass()
Definition: AMDGPULateCodeGenPrepare.cpp:195
llvm::createSILowerI1CopiesPass
FunctionPass * createSILowerI1CopiesPass()
Definition: SILowerI1Copies.cpp:413
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition: GCNIterativeScheduler.h:36
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition: FlattenCFGPass.cpp:52
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition: Internalize.h:36
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition: AMDGPUArgumentUsageInfo.h:129
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1719
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:34
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::initializeSILowerI1CopiesPass
void initializeSILowerI1CopiesPass(PassRegistry &)
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:400
llvm::initializeAMDGPUDAGToDAGISelPass
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::ShadowStackGCLoweringID
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
Definition: ShadowStackGCLowering.cpp:92
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:165
llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:270
llvm::AMDGPUMachineFunction::getLDSSize
unsigned getLDSSize() const
Definition: AMDGPUMachineFunction.h:70
llvm::SIOptimizeVGPRLiveRangeID
char & SIOptimizeVGPRLiveRangeID
Definition: SIOptimizeVGPRLiveRange.cpp:572
llvm::createAMDGPUUnifyMetadataPass
ModulePass * createAMDGPUUnifyMetadataPass()
InstructionSelect.h
EnableStructurizerWorkarounds
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition: AMDGPUAliasAnalysis.h:64
EnableAtomicOptimizations
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:412
llvm::Optional< Reloc::Model >
llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:73
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::createBarrierNoopPass
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Definition: BarrierNoopPass.cpp:43
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:384
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
llvm::AMDGPU::SIModeRegisterDefaults::FP32InputDenormals
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition: AMDGPUBaseInfo.h:920
llvm::PassBuilder::registerAnalysisRegistrationCallback
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
Definition: PassBuilder.h:525
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
SIMachineScheduler.h
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition: SIMachineFunctionInfo.h:234
llvm::createGVNPass
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:3097
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition: AMDGPUArgumentUsageInfo.h:130
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition: R600OpenCLImageTypeLoweringPass.cpp:372
llvm::createR600ClauseMergePass
FunctionPass * createR600ClauseMergePass()
Definition: R600ClauseMergePass.cpp:209
llvm::AMDGPUUseNativeCallsPass
Definition: AMDGPU.h:88
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition: AMDGPUArgumentUsageInfo.h:125
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsID
char & SIPreAllocateWWMRegsID
Definition: SIPreAllocateWWMRegs.cpp:81
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition: SIPostRABundler.cpp:69
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::initializeSIShrinkInstructionsPass
void initializeSIShrinkInstructionsPass(PassRegistry &)
LegacyPassManager.h
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition: TwoAddressInstructionPass.cpp:192
PassManagerBuilder.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition: AMDGPUTargetMachine.cpp:1487
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
Internalize.h
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:407
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:50
llvm::AMDGPUMachineFunction::Mode
AMDGPU::SIModeRegisterDefaults Mode
Definition: AMDGPUMachineFunction.h:44
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::createAMDGPUExternalAAWrapperPass
ImmutablePass * createAMDGPUExternalAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:36
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition: AMDGPUArgumentUsageInfo.h:128
llvm::initializeAMDGPULowerIntrinsicsPass
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::AMDGPUUnifyMetadataPass
Definition: AMDGPU.h:300
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition: AMDGPUArgumentUsageInfo.h:141
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
FunctionPassCtor
llvm::SIOptimizeExecMaskingID
char & SIOptimizeExecMaskingID
Definition: SIOptimizeExecMasking.cpp:52
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition: SIMachineFunctionInfo.h:287
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
GCNIterativeScheduler.h
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition: AMDGPUArgumentUsageInfo.h:133
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:828
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition: InferAddressSpaces.cpp:1208
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
AMDGPUAliasAnalysis.h
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:27
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1703
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::createR600Packetizer
FunctionPass * createR600Packetizer()
Definition: R600Packetizer.cpp:411
AlwaysInliner.h
llvm::R600TargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:820
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition: SIMachineFunctionInfo.h:233
llvm::PassBuilder::registerParseAACallback
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
Definition: PassBuilder.h:517
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition: PostRAHazardRecognizer.cpp:64
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:720
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::Legalizer
Definition: Legalizer.h:31
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:148
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
AMDGPUTargetInfo.h
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition: FuncletLayout.cpp:39
AMDGPUMacroFusion.h
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition: SIInsertWaitcnts.cpp:802
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
EnableLDSReplaceWithPointer
static cl::opt< bool > EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition: PassBuilder.h:134
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:251
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition: SIMachineFunctionInfo.h:235
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:150
llvm::createR600EmitClauseMarkers
FunctionPass * createR600EmitClauseMarkers()
Definition: R600EmitClauseMarkers.cpp:336
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::EarlyIfConverterID
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition: EarlyIfConversion.cpp:784
useDefaultRegisterAllocator
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
Definition: TargetPassConfig.cpp:1069
llvm::AMDGPUPromoteAllocaPass
Definition: AMDGPU.h:250
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1209
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition: InstructionSelect.h:31
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:786
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition: TargetRegistry.h:1275
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:318
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::Triple::r600
@ r600
Definition: Triple.h:71
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition: UnifyLoopExits.cpp:53
llvm::GCNIterativeScheduler
Definition: GCNIterativeScheduler.h:29
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:383
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:399
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:129
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
AMDGPUTargetObjectFile.h
llvm::AMDGPULowerKernelAttributesPass
Definition: AMDGPU.h:125
GVN.h
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:410
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
llvm::initializeAMDGPUResourceUsageAnalysisPass
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:180
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition: StackMapLivenessAnalysis.cpp:86
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:426
llvm::initializeAMDGPUReplaceLDSUseWithPointerPass
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition: Sink.cpp:284
llvm::createSpeculativeExecutionPass
FunctionPass * createSpeculativeExecutionPass()
Definition: SpeculativeExecution.cpp:325
Utils.h
llvm::SILoadStoreOptimizerID
char & SILoadStoreOptimizerID
Definition: SILoadStoreOptimizer.cpp:576
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::None
const NoneType None
Definition: None.h:23
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
llvm::initializeSIOptimizeVGPRLiveRangePass
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition: TargetMachine.cpp:56
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1381
llvm::SmallString< 128 >
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:122
llvm::createFunctionInliningPass
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
Definition: InlineSimple.cpp:97
llvm::legacy::PassManagerBase::add
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
llvm::R600TTIImpl
Definition: AMDGPUTargetTransformInfo.h:225
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:75
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:32
llvm::PassManagerBuilder
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
Definition: PassManagerBuilder.h:59
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition: LowerSwitch.cpp:582
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:92
AMDGPUTargetTransformInfo.h
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
Passes.h
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:190
llvm::VirtRegRewriterID
char & VirtRegRewriterID
VirtRegRewriter pass.
Definition: VirtRegMap.cpp:227
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:158
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::initializeSILowerSGPRSpillsPass
void initializeSILowerSGPRSpillsPass(PassRegistry &)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:622
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:501
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:535
R600SchedRegistry
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
OptVGPRLiveRange
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition: LCSSA.cpp:484
EnableR600StructurizeCFG
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition: TargetMachine.h:96
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:422
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
llvm::GCLoweringID
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
Definition: GCRootLowering.cpp:88
llvm::R600Subtarget
Definition: R600Subtarget.h:36
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition: SIMachineFunctionInfo.h:286
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::CodeGenOpt::Default
@ Default
Definition: CodeGen.h:55
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::R600TargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1174
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
llvm::TargetMachine::setRequiresStructuredCFG
void setRequiresStructuredCFG(bool Value)
Definition: TargetMachine.h:214
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::initializeSIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowPass(PassRegistry &)
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3489
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition: AMDGPUArgumentUsageInfo.h:135
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:387
llvm::RegisterRegAllocBase< RegisterRegAlloc >::FunctionPassCtor
FunctionPass *(*)() FunctionPassCtor
Definition: RegAllocRegistry.h:32
llvm::EngineKind::JIT
@ JIT
Definition: ExecutionEngine.h:525
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition: DetectDeadLanes.cpp:128
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:206
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition: AMDGPUArgumentUsageInfo.h:124
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass()
Definition: AMDGPUAtomicOptimizer.cpp:707
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
IPO.h
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:191
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition: SIMachineFunctionInfo.cpp:600
llvm::createGlobalDCEPass
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
llvm::FinalizeMachineBundlesID
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier,...
Definition: MachineInstrBundle.cpp:98
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:62
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
Definition: SIFixVGPRCopies.cpp:45
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
CGSCCPassManager.h
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:120
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition: GCNIterativeScheduler.h:35
createR600MachineScheduler
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:403
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::AMDGPUSimplifyLibCallsPass
Definition: AMDGPU.h:80
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: TargetPassConfig.cpp:810
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition: TargetPassConfig.cpp:1381
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition: AMDGPUArgumentUsageInfo.h:137
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition: SIFormMemoryClauses.cpp:91
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
LateCFGStructurize
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
TargetPassConfig.h
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:121
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition: AMDGPUArgumentUsageInfo.h:134
Localizer.h
llvm::MachineCSEID
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:153
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: TargetPassConfig.cpp:939
llvm::AMDGPU::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition: AMDGPUBaseInfo.h:916
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:209
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:429
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16InputDenormals
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition: AMDGPUBaseInfo.h:925
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::initializeSIOptimizeExecMaskingPass
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::SIScheduleDAGMI
Definition: SIMachineScheduler.h:426
llvm::PassBuilder::registerPipelineParsingCallback
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
Definition: PassBuilder.h:547
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:38
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:29
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition: PatchableFunction.cpp:96
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
AMDGPUExportClustering.h
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::createSIShrinkInstructionsPass
FunctionPass * createSIShrinkInstructionsPass()
llvm::createAMDGPUMachineCFGStructurizerPass
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
Definition: AMDGPUMachineCFGStructurizer.cpp:2886
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:95
EnableAMDGPUFunctionCallsOpt
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:472
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:386
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:740
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
computeDataLayout
static StringRef computeDataLayout(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:471
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
AMDGPU.h
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:858
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition: SIMachineFunctionInfo.h:288
SimplifyLibCalls.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
GlobalDCE.h
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition: SIMachineFunctionInfo.h:291
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:74
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:273
EnablePreRAOptimizations
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
IRTranslator.h
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:125
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:60
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition: AMDGPUArgumentUsageInfo.h:144
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition: SIWholeQuadMode.cpp:265
llvm::getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
Definition: AVRTargetMachine.cpp:39
EnableSROA
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition: TargetMachine.h:473
llvm::LLVMTargetMachine::initAsmInfo
void initAsmInfo()
Definition: LLVMTargetMachine.cpp:41
llvm::initializeAMDGPUAnnotateUniformValuesPass
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
llvm::RenameIndependentSubregsID
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Definition: RenameIndependentSubregs.cpp:113
llvm::AMDGPUPrintfRuntimeBindingPass
Definition: AMDGPU.h:291
llvm::AMDGPUReplaceLDSUseWithPointerPass
Definition: AMDGPU.h:157
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition: StructurizeCFG.cpp:1086
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: AMDGPUBaseInfo.h:926
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1472
llvm::PassManager< Module >
llvm::createAMDGPULowerKernelAttributesPass
ModulePass * createAMDGPULowerKernelAttributesPass()
Definition: AMDGPULowerKernelAttributes.cpp:258
llvm::initializeSIFixSGPRCopiesPass
void initializeSIFixSGPRCopiesPass(PassRegistry &)
llvm::PerFunctionMIParsingState
Definition: MIParser.h:162
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition: AMDGPUArgumentUsageInfo.h:136
llvm::createAMDGPUPromoteAllocaToVector
FunctionPass * createAMDGPUPromoteAllocaToVector()
Definition: AMDGPUPromoteAlloca.cpp:1149
llvm::R600TargetMachine::R600TargetMachine
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:751
llvm::createR600VectorRegMerger
FunctionPass * createR600VectorRegMerger()
Definition: R600OptimizeVectorRegisters.cpp:385
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
LLVM_READNONE
#define LLVM_READNONE
Definition: Compiler.h:205
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:435
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3426
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
llvm::initializeSIFixVGPRCopiesPass
void initializeSIFixVGPRCopiesPass(PassRegistry &)
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition: SIMachineFunctionInfo.h:232
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
EnableR600IfConvert
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:422
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:90
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:588
llvm::createR600ControlFlowFinalizer
FunctionPass * createR600ControlFlowFinalizer()
Definition: R600ControlFlowFinalizer.cpp:689
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
Definition: AMDGPUTargetMachine.cpp:620
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition: TargetPassConfig.cpp:1235
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:405
llvm::AMDGPUPropagateAttributesEarlyPass
Definition: AMDGPU.h:133
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:385
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1573
RegBankSelect.h
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:249
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: AMDGPUTargetMachine.cpp:794
llvm::PassManagerBuilder::EP_ModuleOptimizerEarly
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
Definition: PassManagerBuilder.h:76
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:157
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::ArgDescriptor::createRegister
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:44
PassManager.h
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:315
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::createAMDGPUReplaceLDSUseWithPointerPass
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:451
llvm::AMDGPUTargetMachine::adjustPassManager
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
Definition: AMDGPUTargetMachine.cpp:551
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:385
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition: DeadMachineInstructionElim.cpp:57
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition: MIParser.h:164
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:831
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:381
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition: AMDGPUArgumentUsageInfo.h:127
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition: AMDGPUPromoteAlloca.cpp:1145
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1238
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition: BranchRelaxation.cpp:119
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition: AMDGPUCodeGenPrepare.cpp:1445
llvm::RegisterRegAllocBase
RegisterRegAllocBase class - Track the registration of register allocators.
Definition: RegAllocRegistry.h:30
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:210
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:36
llvm::initializeAMDGPUAttributorPass
void initializeAMDGPUAttributorPass(PassRegistry &)
Legalizer.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:336
llvm::createAMDGPUFixFunctionBitcastsPass
ModulePass * createAMDGPUFixFunctionBitcastsPass()
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:104
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:124
llvm::PassManagerBuilder::EP_EarlyAsPossible
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
Definition: PassManagerBuilder.h:72
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:149
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:298
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: AMDGPUTargetMachine.cpp:800
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
N
#define N
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition: StraightLineStrengthReduce.cpp:269
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::initializeAMDGPUFixFunctionBitcastsPass
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition: GCNPreRAOptimizations.cpp:78
llvm::initializeSILoadStoreOptimizerPass
void initializeSILoadStoreOptimizerPass(PassRegistry &)
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition: LegacyPassManager.h:39
llvm::IRTranslator
Definition: IRTranslator.h:62
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:471
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
RegName
#define RegName(no)
llvm::createSIAnnotateControlFlowPass
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
Definition: SIAnnotateControlFlow.cpp:375
Vectorize.h
llvm::yaml::SIMode::IEEE
bool IEEE
Definition: SIMachineFunctionInfo.h:231
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::createBasicRegisterAllocator
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
Definition: RegAllocBasic.cpp:337
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition: MachineLICM.cpp:295
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:530
llvm::cl::desc
Definition: CommandLine.h:414
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:385
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::PassManagerBuilder::EP_CGSCCOptimizerLate
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
Definition: PassManagerBuilder.h:117
llvm::createAMDGPUCFGStructurizerPass
FunctionPass * createAMDGPUCFGStructurizerPass()
Definition: AMDILCFGStructurizer.cpp:1654
llvm::PassManager::addPass
std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:542
llvm::createR600ISelDag
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Definition: AMDGPUISelDAGToDAG.cpp:391
llvm::IfConverterID
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
Definition: IfConversion.cpp:436
llvm::CodeGenOpt::Less
@ Less
Definition: CodeGen.h:54
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
Definition: AMDGPUTargetMachine.cpp:504
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition: TargetPassConfig.cpp:1371
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
TargetRegistry.h
llvm::createSROAPass
FunctionPass * createSROAPass()
Definition: SROA.cpp:4848
llvm::AMDGPUPropagateAttributesLatePass
Definition: AMDGPU.h:145
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
InitializePasses.h
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: SIMachineFunctionInfo.h:236
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition: SIOptimizeExecMaskingPreRA.cpp:75
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition: AMDGPUMCTargetDesc.cpp:68
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition: TargetMachine.h:106
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
EnableAMDGPUFixedFunctionABIOpt
static cl::opt< bool, true > EnableAMDGPUFixedFunctionABIOpt("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:35
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition: TargetPassConfig.h:373
AMDGPUTargetMachine.h
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition: AMDGPUTargetMachine.cpp:1476
PassName
static const char PassName[]
Definition: X86LowerAMXIntrinsics.cpp:669
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:382
llvm::initializeSILowerControlFlowPass
void initializeSILowerControlFlowPass(PassRegistry &)
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
RegAllocRegistry.h
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1699
MIParser.h
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition: Localizer.h:40
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition: AMDGPUMacroFusion.cpp:62