LLVM  16.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for SI+ GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUIGroupLP.h"
20 #include "AMDGPUMacroFusion.h"
21 #include "AMDGPUTargetObjectFile.h"
23 #include "GCNIterativeScheduler.h"
24 #include "GCNSchedStrategy.h"
25 #include "GCNVOPDUtils.h"
26 #include "R600.h"
27 #include "R600TargetMachine.h"
28 #include "SIMachineFunctionInfo.h"
29 #include "SIMachineScheduler.h"
31 #include "Utils/AMDGPUBaseInfo.h"
40 #include "llvm/CodeGen/Passes.h"
43 #include "llvm/IR/IntrinsicsAMDGPU.h"
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/PatternMatch.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/MC/TargetRegistry.h"
50 #include "llvm/Transforms/IPO.h"
54 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils.h"
60 #include <optional>
61 
62 using namespace llvm;
63 using namespace llvm::PatternMatch;
64 
65 namespace {
66 class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
67 public:
68  SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
69  : RegisterRegAllocBase(N, D, C) {}
70 };
71 
72 class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
73 public:
74  VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
75  : RegisterRegAllocBase(N, D, C) {}
76 };
77 
78 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
79  const TargetRegisterClass &RC) {
80  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
81 }
82 
83 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
84  const TargetRegisterClass &RC) {
85  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
86 }
87 
88 
89 /// -{sgpr|vgpr}-regalloc=... command line option.
90 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
91 
92 /// A dummy default pass factory indicates whether the register allocator is
93 /// overridden on the command line.
94 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
95 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
96 
97 static SGPRRegisterRegAlloc
98 defaultSGPRRegAlloc("default",
99  "pick SGPR register allocator based on -O option",
101 
102 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
104 SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
105  cl::desc("Register allocator to use for SGPRs"));
106 
107 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
109 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
110  cl::desc("Register allocator to use for VGPRs"));
111 
112 
113 static void initializeDefaultSGPRRegisterAllocatorOnce() {
114  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
115 
116  if (!Ctor) {
117  Ctor = SGPRRegAlloc;
118  SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
119  }
120 }
121 
122 static void initializeDefaultVGPRRegisterAllocatorOnce() {
123  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
124 
125  if (!Ctor) {
126  Ctor = VGPRRegAlloc;
127  VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
128  }
129 }
130 
131 static FunctionPass *createBasicSGPRRegisterAllocator() {
132  return createBasicRegisterAllocator(onlyAllocateSGPRs);
133 }
134 
135 static FunctionPass *createGreedySGPRRegisterAllocator() {
136  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
137 }
138 
139 static FunctionPass *createFastSGPRRegisterAllocator() {
140  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
141 }
142 
143 static FunctionPass *createBasicVGPRRegisterAllocator() {
144  return createBasicRegisterAllocator(onlyAllocateVGPRs);
145 }
146 
147 static FunctionPass *createGreedyVGPRRegisterAllocator() {
148  return createGreedyRegisterAllocator(onlyAllocateVGPRs);
149 }
150 
151 static FunctionPass *createFastVGPRRegisterAllocator() {
152  return createFastRegisterAllocator(onlyAllocateVGPRs, true);
153 }
154 
155 static SGPRRegisterRegAlloc basicRegAllocSGPR(
156  "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
157 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
158  "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
159 
160 static SGPRRegisterRegAlloc fastRegAllocSGPR(
161  "fast", "fast register allocator", createFastSGPRRegisterAllocator);
162 
163 
164 static VGPRRegisterRegAlloc basicRegAllocVGPR(
165  "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
166 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
167  "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
168 
169 static VGPRRegisterRegAlloc fastRegAllocVGPR(
170  "fast", "fast register allocator", createFastVGPRRegisterAllocator);
171 }
172 
174  "amdgpu-sroa",
175  cl::desc("Run SROA after promote alloca pass"),
177  cl::init(true));
178 
179 static cl::opt<bool>
180 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
181  cl::desc("Run early if-conversion"),
182  cl::init(false));
183 
184 static cl::opt<bool>
185 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
186  cl::desc("Run pre-RA exec mask optimizations"),
187  cl::init(true));
188 
189 // Option to disable vectorizer for tests.
191  "amdgpu-load-store-vectorizer",
192  cl::desc("Enable load store vectorizer"),
193  cl::init(true),
194  cl::Hidden);
195 
196 // Option to control global loads scalarization
198  "amdgpu-scalarize-global-loads",
199  cl::desc("Enable global load scalarization"),
200  cl::init(true),
201  cl::Hidden);
202 
203 // Option to run internalize pass.
205  "amdgpu-internalize-symbols",
206  cl::desc("Enable elimination of non-kernel functions and unused globals"),
207  cl::init(false),
208  cl::Hidden);
209 
210 // Option to inline all early.
212  "amdgpu-early-inline-all",
213  cl::desc("Inline all functions early"),
214  cl::init(false),
215  cl::Hidden);
216 
218  "amdgpu-sdwa-peephole",
219  cl::desc("Enable SDWA peepholer"),
220  cl::init(true));
221 
223  "amdgpu-dpp-combine",
224  cl::desc("Enable DPP combiner"),
225  cl::init(true));
226 
227 // Enable address space based alias analysis
228 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
229  cl::desc("Enable AMDGPU Alias Analysis"),
230  cl::init(true));
231 
232 // Option to run late CFG structurizer
234  "amdgpu-late-structurize",
235  cl::desc("Enable late CFG structurization"),
237  cl::Hidden);
238 
239 // Enable lib calls simplifications
241  "amdgpu-simplify-libcall",
242  cl::desc("Enable amdgpu library simplifications"),
243  cl::init(true),
244  cl::Hidden);
245 
247  "amdgpu-ir-lower-kernel-arguments",
248  cl::desc("Lower kernel argument loads in IR pass"),
249  cl::init(true),
250  cl::Hidden);
251 
253  "amdgpu-reassign-regs",
254  cl::desc("Enable register reassign optimizations on gfx10+"),
255  cl::init(true),
256  cl::Hidden);
257 
259  "amdgpu-opt-vgpr-liverange",
260  cl::desc("Enable VGPR liverange optimizations for if-else structure"),
261  cl::init(true), cl::Hidden);
262 
263 // Enable atomic optimization
265  "amdgpu-atomic-optimizations",
266  cl::desc("Enable atomic optimizations"),
267  cl::init(false),
268  cl::Hidden);
269 
270 // Enable Mode register optimization
272  "amdgpu-mode-register",
273  cl::desc("Enable mode register pass"),
274  cl::init(true),
275  cl::Hidden);
276 
277 // Enable GFX11+ s_delay_alu insertion
278 static cl::opt<bool>
279  EnableInsertDelayAlu("amdgpu-enable-delay-alu",
280  cl::desc("Enable s_delay_alu insertion"),
281  cl::init(true), cl::Hidden);
282 
283 // Enable GFX11+ VOPD
284 static cl::opt<bool>
285  EnableVOPD("amdgpu-enable-vopd",
286  cl::desc("Enable VOPD, dual issue of VALU in wave32"),
287  cl::init(true), cl::Hidden);
288 
289 // Option is used in lit tests to prevent deadcoding of patterns inspected.
290 static cl::opt<bool>
291 EnableDCEInRA("amdgpu-dce-in-ra",
292  cl::init(true), cl::Hidden,
293  cl::desc("Enable machine DCE inside regalloc"));
294 
295 static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
296  cl::desc("Adjust wave priority"),
297  cl::init(false), cl::Hidden);
298 
300  "amdgpu-scalar-ir-passes",
301  cl::desc("Enable scalar IR passes"),
302  cl::init(true),
303  cl::Hidden);
304 
306  "amdgpu-enable-structurizer-workarounds",
307  cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
308  cl::Hidden);
309 
311  "amdgpu-enable-lds-replace-with-pointer",
312  cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
313  cl::Hidden);
314 
316  "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
318  cl::Hidden);
319 
321  "amdgpu-enable-pre-ra-optimizations",
322  cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
323  cl::Hidden);
324 
326  "amdgpu-enable-promote-kernel-arguments",
327  cl::desc("Enable promotion of flat kernel pointer arguments to global"),
328  cl::Hidden, cl::init(true));
329 
331  "amdgpu-enable-max-ilp-scheduling-strategy",
332  cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
333  cl::Hidden, cl::init(false));
334 
336  // Register the target
339 
410 }
411 
412 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
413  return std::make_unique<AMDGPUTargetObjectFile>();
414 }
415 
417  return new SIScheduleDAGMI(C);
418 }
419 
420 static ScheduleDAGInstrs *
422  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
423  ScheduleDAGMILive *DAG =
424  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
425  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
426  if (ST.shouldClusterStores())
427  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
428  DAG->addMutation(createIGroupLPDAGMutation());
429  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
430  DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
431  return DAG;
432 }
433 
434 static ScheduleDAGInstrs *
436  ScheduleDAGMILive *DAG =
437  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
439  return DAG;
440 }
441 
442 static ScheduleDAGInstrs *
444  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
445  auto DAG = new GCNIterativeScheduler(C,
447  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
448  if (ST.shouldClusterStores())
449  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
450  return DAG;
451 }
452 
454  return new GCNIterativeScheduler(C,
456 }
457 
458 static ScheduleDAGInstrs *
460  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
461  auto DAG = new GCNIterativeScheduler(C,
463  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
464  if (ST.shouldClusterStores())
465  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
466  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
467  return DAG;
468 }
469 
471 SISchedRegistry("si", "Run SI's custom scheduler",
473 
475 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
476  "Run GCN scheduler to maximize occupancy",
478 
480  GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp",
482 
484  "gcn-iterative-max-occupancy-experimental",
485  "Run GCN scheduler to maximize occupancy (experimental)",
487 
489  "gcn-iterative-minreg",
490  "Run GCN iterative scheduler for minimal register usage (experimental)",
492 
494  "gcn-iterative-ilp",
495  "Run GCN iterative scheduler for ILP scheduling (experimental)",
497 
498 static StringRef computeDataLayout(const Triple &TT) {
499  if (TT.getArch() == Triple::r600) {
500  // 32-bit pointers.
501  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
502  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
503  }
504 
505  // 32-bit private, local, and region pointers. 64-bit global, constant and
506  // flat, non-integral buffer fat pointers.
507  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
508  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
509  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
510  "-ni:7";
511 }
512 
514 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
515  if (!GPU.empty())
516  return GPU;
517 
518  // Need to default to a target with flat support for HSA.
519  if (TT.getArch() == Triple::amdgcn)
520  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
521 
522  return "r600";
523 }
524 
525 static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
526  // The AMDGPU toolchain only supports generating shared objects, so we
527  // must always use PIC.
528  return Reloc::PIC_;
529 }
530 
532  StringRef CPU, StringRef FS,
534  std::optional<Reloc::Model> RM,
535  std::optional<CodeModel::Model> CM,
536  CodeGenOpt::Level OptLevel)
539  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
540  TLOF(createTLOF(getTargetTriple())) {
541  initAsmInfo();
542  if (TT.getArch() == Triple::amdgcn) {
543  if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
545  else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
547  }
548 }
549 
553 
555 
557  Attribute GPUAttr = F.getFnAttribute("target-cpu");
558  return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
559 }
560 
562  Attribute FSAttr = F.getFnAttribute("target-features");
563 
564  return FSAttr.isValid() ? FSAttr.getValueAsString()
566 }
567 
568 /// Predicate for Internalize pass.
569 static bool mustPreserveGV(const GlobalValue &GV) {
570  if (const Function *F = dyn_cast<Function>(&GV))
571  return F->isDeclaration() || F->getName().startswith("__asan_") ||
572  F->getName().startswith("__sanitizer_") ||
573  AMDGPU::isEntryFunctionCC(F->getCallingConv());
574 
576  return !GV.use_empty();
577 }
578 
581 }
582 
587  if (PassName == "amdgpu-propagate-attributes-late") {
589  return true;
590  }
591  if (PassName == "amdgpu-unify-metadata") {
593  return true;
594  }
595  if (PassName == "amdgpu-printf-runtime-binding") {
597  return true;
598  }
599  if (PassName == "amdgpu-always-inline") {
601  return true;
602  }
603  if (PassName == "amdgpu-replace-lds-use-with-pointer") {
605  return true;
606  }
607  if (PassName == "amdgpu-lower-module-lds") {
609  return true;
610  }
611  return false;
612  });
616  if (PassName == "amdgpu-simplifylib") {
618  return true;
619  }
620  if (PassName == "amdgpu-usenative") {
622  return true;
623  }
624  if (PassName == "amdgpu-promote-alloca") {
625  PM.addPass(AMDGPUPromoteAllocaPass(*this));
626  return true;
627  }
628  if (PassName == "amdgpu-promote-alloca-to-vector") {
630  return true;
631  }
632  if (PassName == "amdgpu-lower-kernel-attributes") {
634  return true;
635  }
636  if (PassName == "amdgpu-propagate-attributes-early") {
638  return true;
639  }
640  if (PassName == "amdgpu-promote-kernel-arguments") {
642  return true;
643  }
644  return false;
645  });
646 
648  FAM.registerPass([&] { return AMDGPUAA(); });
649  });
650 
651  PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
652  if (AAName == "amdgpu-aa") {
654  return true;
655  }
656  return false;
657  });
658 
660  [this](ModulePassManager &PM, OptimizationLevel Level) {
667  });
668 
670  [this](ModulePassManager &PM, OptimizationLevel Level) {
671  if (Level == OptimizationLevel::O0)
672  return;
673 
676 
677  if (InternalizeSymbols) {
679  }
681  if (InternalizeSymbols) {
682  PM.addPass(GlobalDCEPass());
683  }
686  });
687 
689  [this](CGSCCPassManager &PM, OptimizationLevel Level) {
690  if (Level == OptimizationLevel::O0)
691  return;
692 
694 
695  // Add promote kernel arguments pass to the opt pipeline right before
696  // infer address spaces which is needed to do actual address space
697  // rewriting.
698  if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
701 
702  // Add infer address spaces pass to the opt pipeline after inlining
703  // but before SROA to increase SROA opportunities.
705 
706  // This should run after inlining to have any chance of doing
707  // anything, and before other cleanup optimizations.
709 
710  if (Level != OptimizationLevel::O0) {
711  // Promote alloca to vector before SROA and loop unroll. If we
712  // manage to eliminate allocas before unroll we may choose to unroll
713  // less.
715  }
716 
718  });
719 }
720 
721 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
722  return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
723  AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
724  AddrSpace == AMDGPUAS::REGION_ADDRESS)
725  ? -1
726  : 0;
727 }
728 
730  unsigned DestAS) const {
731  return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
733 }
734 
736  const auto *LD = dyn_cast<LoadInst>(V);
737  if (!LD)
739 
740  // It must be a generic pointer loaded.
741  assert(V->getType()->isPointerTy() &&
743 
744  const auto *Ptr = LD->getPointerOperand();
745  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
747  // For a generic pointer loaded from the constant memory, it could be assumed
748  // as a global pointer since the constant memory is only populated on the
749  // host side. As implied by the offload programming model, only global
750  // pointers could be referenced on the host side.
752 }
753 
754 std::pair<const Value *, unsigned>
756  if (auto *II = dyn_cast<IntrinsicInst>(V)) {
757  switch (II->getIntrinsicID()) {
758  case Intrinsic::amdgcn_is_shared:
759  return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
760  case Intrinsic::amdgcn_is_private:
761  return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
762  default:
763  break;
764  }
765  return std::make_pair(nullptr, -1);
766  }
767  // Check the global pointer predication based on
768  // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
769  // the order of 'is_shared' and 'is_private' is not significant.
770  Value *Ptr;
771  if (match(
772  const_cast<Value *>(V),
773  m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
774  m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
775  m_Deferred(Ptr))))))
776  return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
777 
778  return std::make_pair(nullptr, -1);
779 }
780 
781 unsigned
783  switch (Kind) {
793  }
794  return AMDGPUAS::FLAT_ADDRESS;
795 }
796 
797 //===----------------------------------------------------------------------===//
798 // GCN Target Machine (SI+)
799 //===----------------------------------------------------------------------===//
800 
802  StringRef CPU, StringRef FS,
804  std::optional<Reloc::Model> RM,
805  std::optional<CodeModel::Model> CM,
806  CodeGenOpt::Level OL, bool JIT)
807  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
808 
809 const TargetSubtargetInfo *
811  StringRef GPU = getGPUName(F);
813 
814  SmallString<128> SubtargetKey(GPU);
815  SubtargetKey.append(FS);
816 
817  auto &I = SubtargetMap[SubtargetKey];
818  if (!I) {
819  // This needs to be done before we create a new subtarget since any
820  // creation will depend on the TM and the code generation flags on the
821  // function that reside in TargetOptions.
823  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
824  }
825 
826  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
827 
828  return I.get();
829 }
830 
833  return TargetTransformInfo(GCNTTIImpl(this, F));
834 }
835 
836 //===----------------------------------------------------------------------===//
837 // AMDGPU Pass Setup
838 //===----------------------------------------------------------------------===//
839 
840 std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
842 }
843 
844 namespace {
845 
846 class GCNPassConfig final : public AMDGPUPassConfig {
847 public:
848  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
849  : AMDGPUPassConfig(TM, PM) {
850  // It is necessary to know the register usage of the entire call graph. We
851  // allow calls without EnableAMDGPUFunctionCalls if they are marked
852  // noinline, so this is always required.
853  setRequiresCodeGenSCCOrder(true);
854  substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
855  }
856 
857  GCNTargetMachine &getGCNTargetMachine() const {
858  return getTM<GCNTargetMachine>();
859  }
860 
862  createMachineScheduler(MachineSchedContext *C) const override;
863 
865  createPostMachineScheduler(MachineSchedContext *C) const override {
867  C, std::make_unique<PostGenericScheduler>(C),
868  /*RemoveKillFlags=*/true);
869  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
871  if (ST.shouldClusterStores())
873  DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
875  if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
877  return DAG;
878  }
879 
880  bool addPreISel() override;
881  void addMachineSSAOptimization() override;
882  bool addILPOpts() override;
883  bool addInstSelector() override;
884  bool addIRTranslator() override;
885  void addPreLegalizeMachineIR() override;
886  bool addLegalizeMachineIR() override;
887  void addPreRegBankSelect() override;
888  bool addRegBankSelect() override;
889  void addPreGlobalInstructionSelect() override;
890  bool addGlobalInstructionSelect() override;
891  void addFastRegAlloc() override;
892  void addOptimizedRegAlloc() override;
893 
894  FunctionPass *createSGPRAllocPass(bool Optimized);
895  FunctionPass *createVGPRAllocPass(bool Optimized);
896  FunctionPass *createRegAllocPass(bool Optimized) override;
897 
898  bool addRegAssignAndRewriteFast() override;
899  bool addRegAssignAndRewriteOptimized() override;
900 
901  void addPreRegAlloc() override;
902  bool addPreRewrite() override;
903  void addPostRegAlloc() override;
904  void addPreSched2() override;
905  void addPreEmitPass() override;
906 };
907 
908 } // end anonymous namespace
909 
911  : TargetPassConfig(TM, PM) {
912  // Exceptions and StackMaps are not supported, so these passes will never do
913  // anything.
916  // Garbage collection is not supported.
919 }
920 
924  else
926 }
927 
931  // ReassociateGEPs exposes more opportunities for SLSR. See
932  // the example in reassociate-geps-and-slsr.ll.
934  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
935  // EarlyCSE can reuse.
937  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
939  // NaryReassociate on GEPs creates redundant common expressions, so run
940  // EarlyCSE after it.
942 }
943 
946 
947  // There is no reason to run these.
951 
954 
955  // A call to propagate attributes pass in the backend in case opt was not run.
957 
959 
960  // Function calls are not supported, so make sure we inline everything.
963  // We need to add the barrier noop pass, otherwise adding the function
964  // inlining pass will cause all of the PassConfigs passes to be run
965  // one function at a time, which means if we have a module with two
966  // functions, then we will generate code for the first function
967  // without ever running any passes on the second.
969 
970  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
973 
974  // Replace OpenCL enqueued block function pointers with global variables.
976 
977  // Can increase LDS used by kernel so runs before PromoteAlloca
978  if (EnableLowerModuleLDS) {
979  // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
980  // pass "amdgpu-lower-module-lds", and also it required to be run only if
981  // "amdgpu-lower-module-lds" pass is enabled.
984 
986  }
987 
990 
992 
993  if (TM.getOptLevel() > CodeGenOpt::None) {
995 
996  if (EnableSROA)
1000 
1004  AAResults &AAR) {
1005  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1006  AAR.addAAResult(WrapperPass->getResult());
1007  }));
1008  }
1009 
1011  // TODO: May want to move later or split into an early and late one.
1013  }
1014  }
1015 
1017 
1018  // EarlyCSE is not always strong enough to clean up what LSR produces. For
1019  // example, GVN can combine
1020  //
1021  // %0 = add %a, %b
1022  // %1 = add %b, %a
1023  //
1024  // and
1025  //
1026  // %0 = shl nsw %a, 2
1027  // %1 = shl %a, 2
1028  //
1029  // but EarlyCSE can do neither of them.
1032 }
1033 
1035  if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1037 
1038  // FIXME: This pass adds 2 hacky attributes that can be replaced with an
1039  // analysis, and should be removed.
1041  }
1042 
1043  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1046 
1048 
1051 
1052  // LowerSwitch pass may introduce unreachable blocks that can
1053  // cause unexpected behavior for subsequent passes. Placing it
1054  // here seems better that these blocks would get cleaned up by
1055  // UnreachableBlockElim inserted next in the pass flow.
1057 }
1058 
1060  if (TM->getOptLevel() > CodeGenOpt::None)
1062  return false;
1063 }
1064 
1067  return false;
1068 }
1069 
1071  // Do nothing. GC is not supported.
1072  return false;
1073 }
1074 
1077  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1079  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1080  if (ST.shouldClusterStores())
1081  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1082  return DAG;
1083 }
1084 
1085 //===----------------------------------------------------------------------===//
1086 // GCN Pass Setup
1087 //===----------------------------------------------------------------------===//
1088 
1089 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1090  MachineSchedContext *C) const {
1091  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1092  if (ST.enableSIScheduler())
1093  return createSIMachineScheduler(C);
1094 
1097 
1099 }
1100 
1101 bool GCNPassConfig::addPreISel() {
1103 
1104  if (TM->getOptLevel() > CodeGenOpt::None)
1106 
1107  if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1109  }
1110 
1111  if (TM->getOptLevel() > CodeGenOpt::None)
1112  addPass(createSinkingPass());
1113 
1114  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1115  // regions formed by them.
1117  if (!LateCFGStructurize) {
1119  addPass(createFixIrreduciblePass());
1120  addPass(createUnifyLoopExitsPass());
1121  }
1122  addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1123  }
1125  if (!LateCFGStructurize) {
1127  // TODO: Move this right after structurizeCFG to avoid extra divergence
1128  // analysis. This depends on stopping SIAnnotateControlFlow from making
1129  // control flow modifications.
1131  }
1132  addPass(createLCSSAPass());
1133 
1134  if (TM->getOptLevel() > CodeGenOpt::Less)
1135  addPass(&AMDGPUPerfHintAnalysisID);
1136 
1137  return false;
1138 }
1139 
1140 void GCNPassConfig::addMachineSSAOptimization() {
1142 
1143  // We want to fold operands after PeepholeOptimizer has run (or as part of
1144  // it), because it will eliminate extra copies making it easier to fold the
1145  // real source operand. We want to eliminate dead instructions after, so that
1146  // we see fewer uses of the copies. We then need to clean up the dead
1147  // instructions leftover after the operands are folded as well.
1148  //
1149  // XXX - Can we get away without running DeadMachineInstructionElim again?
1150  addPass(&SIFoldOperandsID);
1151  if (EnableDPPCombine)
1152  addPass(&GCNDPPCombineID);
1153  addPass(&SILoadStoreOptimizerID);
1154  if (isPassEnabled(EnableSDWAPeephole)) {
1155  addPass(&SIPeepholeSDWAID);
1156  addPass(&EarlyMachineLICMID);
1157  addPass(&MachineCSEID);
1158  addPass(&SIFoldOperandsID);
1159  }
1160  addPass(&DeadMachineInstructionElimID);
1161  addPass(createSIShrinkInstructionsPass());
1162 }
1163 
1164 bool GCNPassConfig::addILPOpts() {
1166  addPass(&EarlyIfConverterID);
1167 
1169  return false;
1170 }
1171 
1172 bool GCNPassConfig::addInstSelector() {
1174  addPass(&SIFixSGPRCopiesID);
1175  addPass(createSILowerI1CopiesPass());
1176  return false;
1177 }
1178 
1179 bool GCNPassConfig::addIRTranslator() {
1180  addPass(new IRTranslator(getOptLevel()));
1181  return false;
1182 }
1183 
1184 void GCNPassConfig::addPreLegalizeMachineIR() {
1185  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1186  addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1187  addPass(new Localizer());
1188 }
1189 
1190 bool GCNPassConfig::addLegalizeMachineIR() {
1191  addPass(new Legalizer());
1192  return false;
1193 }
1194 
1195 void GCNPassConfig::addPreRegBankSelect() {
1196  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1197  addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1198 }
1199 
1200 bool GCNPassConfig::addRegBankSelect() {
1201  addPass(new RegBankSelect());
1202  return false;
1203 }
1204 
1205 void GCNPassConfig::addPreGlobalInstructionSelect() {
1206  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1207  addPass(createAMDGPURegBankCombiner(IsOptNone));
1208 }
1209 
1210 bool GCNPassConfig::addGlobalInstructionSelect() {
1211  addPass(new InstructionSelect(getOptLevel()));
1212  return false;
1213 }
1214 
1215 void GCNPassConfig::addPreRegAlloc() {
1216  if (LateCFGStructurize) {
1218  }
1219 }
1220 
1221 void GCNPassConfig::addFastRegAlloc() {
1222  // FIXME: We have to disable the verifier here because of PHIElimination +
1223  // TwoAddressInstructions disabling it.
1224 
1225  // This must be run immediately after phi elimination and before
1226  // TwoAddressInstructions, otherwise the processing of the tied operand of
1227  // SI_ELSE will introduce a copy of the tied operand source after the else.
1228  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1229 
1232 
1234 }
1235 
1236 void GCNPassConfig::addOptimizedRegAlloc() {
1237  // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1238  // instructions that cause scheduling barriers.
1239  insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1241 
1242  if (OptExecMaskPreRA)
1244 
1245  if (isPassEnabled(EnablePreRAOptimizations))
1247 
1248  // This is not an essential optimization and it has a noticeable impact on
1249  // compilation time, so we only enable it from O2.
1250  if (TM->getOptLevel() > CodeGenOpt::Less)
1252 
1253  // FIXME: when an instruction has a Killed operand, and the instruction is
1254  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1255  // the register in LiveVariables, this would trigger a failure in verifier,
1256  // we should fix it and enable the verifier.
1257  if (OptVGPRLiveRange)
1259  // This must be run immediately after phi elimination and before
1260  // TwoAddressInstructions, otherwise the processing of the tied operand of
1261  // SI_ELSE will introduce a copy of the tied operand source after the else.
1262  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1263 
1264  if (EnableDCEInRA)
1266 
1268 }
1269 
1270 bool GCNPassConfig::addPreRewrite() {
1271  if (EnableRegReassign)
1272  addPass(&GCNNSAReassignID);
1273  return true;
1274 }
1275 
1276 FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1277  // Initialize the global default.
1278  llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1279  initializeDefaultSGPRRegisterAllocatorOnce);
1280 
1281  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1282  if (Ctor != useDefaultRegisterAllocator)
1283  return Ctor();
1284 
1285  if (Optimized)
1286  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1287 
1288  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1289 }
1290 
1291 FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1292  // Initialize the global default.
1293  llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1294  initializeDefaultVGPRRegisterAllocatorOnce);
1295 
1296  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1297  if (Ctor != useDefaultRegisterAllocator)
1298  return Ctor();
1299 
1300  if (Optimized)
1301  return createGreedyVGPRRegisterAllocator();
1302 
1303  return createFastVGPRRegisterAllocator();
1304 }
1305 
1306 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1307  llvm_unreachable("should not be used");
1308 }
1309 
1310 static const char RegAllocOptNotSupportedMessage[] =
1311  "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1312 
1313 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1314  if (!usingDefaultRegAlloc())
1316 
1317  addPass(createSGPRAllocPass(false));
1318 
1319  // Equivalent of PEI for SGPRs.
1320  addPass(&SILowerSGPRSpillsID);
1321 
1322  addPass(createVGPRAllocPass(false));
1323  return true;
1324 }
1325 
1326 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1327  if (!usingDefaultRegAlloc())
1329 
1330  addPass(createSGPRAllocPass(true));
1331 
1332  // Commit allocated register changes. This is mostly necessary because too
1333  // many things rely on the use lists of the physical registers, such as the
1334  // verifier. This is only necessary with allocators which use LiveIntervals,
1335  // since FastRegAlloc does the replacements itself.
1336  addPass(createVirtRegRewriter(false));
1337 
1338  // Equivalent of PEI for SGPRs.
1339  addPass(&SILowerSGPRSpillsID);
1340 
1341  addPass(createVGPRAllocPass(true));
1342 
1343  addPreRewrite();
1344  addPass(&VirtRegRewriterID);
1345 
1346  return true;
1347 }
1348 
1349 void GCNPassConfig::addPostRegAlloc() {
1350  addPass(&SIFixVGPRCopiesID);
1351  if (getOptLevel() > CodeGenOpt::None)
1352  addPass(&SIOptimizeExecMaskingID);
1354 }
1355 
1356 void GCNPassConfig::addPreSched2() {
1357  if (TM->getOptLevel() > CodeGenOpt::None)
1358  addPass(createSIShrinkInstructionsPass());
1359  addPass(&SIPostRABundlerID);
1360 }
1361 
1362 void GCNPassConfig::addPreEmitPass() {
1363  if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
1364  addPass(&GCNCreateVOPDID);
1365  addPass(createSIMemoryLegalizerPass());
1366  addPass(createSIInsertWaitcntsPass());
1367 
1368  addPass(createSIModeRegisterPass());
1369 
1370  if (getOptLevel() > CodeGenOpt::None)
1371  addPass(&SIInsertHardClausesID);
1372 
1373  addPass(&SILateBranchLoweringPassID);
1374  if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))
1376  if (getOptLevel() > CodeGenOpt::None)
1377  addPass(&SIPreEmitPeepholeID);
1378  // The hazard recognizer that runs as part of the post-ra scheduler does not
1379  // guarantee to be able handle all hazards correctly. This is because if there
1380  // are multiple scheduling regions in a basic block, the regions are scheduled
1381  // bottom up, so when we begin to schedule a region we don't know what
1382  // instructions were emitted directly before it.
1383  //
1384  // Here we add a stand-alone hazard recognizer pass which can handle all
1385  // cases.
1386  addPass(&PostRAHazardRecognizerID);
1387 
1388  if (getOptLevel() > CodeGenOpt::Less)
1389  addPass(&AMDGPUReleaseVGPRsID);
1390 
1391  if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
1392  addPass(&AMDGPUInsertDelayAluID);
1393 
1394  addPass(&BranchRelaxationPassID);
1395 }
1396 
1398  return new GCNPassConfig(*this, PM);
1399 }
1400 
1402  return new yaml::SIMachineFunctionInfo();
1403 }
1404 
1408  return new yaml::SIMachineFunctionInfo(
1409  *MFI, *MF.getSubtarget().getRegisterInfo(), MF);
1410 }
1411 
1414  SMDiagnostic &Error, SMRange &SourceRange) const {
1415  const yaml::SIMachineFunctionInfo &YamlMFI =
1416  static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1417  MachineFunction &MF = PFS.MF;
1419 
1420  if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1421  return true;
1422 
1423  if (MFI->Occupancy == 0) {
1424  // Fixup the subtarget dependent default value.
1425  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1426  MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1427  }
1428 
1429  auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1430  Register TempReg;
1431  if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1432  SourceRange = RegName.SourceRange;
1433  return true;
1434  }
1435  RegVal = TempReg;
1436 
1437  return false;
1438  };
1439 
1440  auto parseOptionalRegister = [&](const yaml::StringValue &RegName,
1441  Register &RegVal) {
1442  return !RegName.Value.empty() && parseRegister(RegName, RegVal);
1443  };
1444 
1445  if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
1446  return true;
1447 
1448  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1449  // Create a diagnostic for a the register string literal.
1450  const MemoryBuffer &Buffer =
1451  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1452  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1453  RegName.Value.size(), SourceMgr::DK_Error,
1454  "incorrect register class for field", RegName.Value,
1455  std::nullopt, std::nullopt);
1456  SourceRange = RegName.SourceRange;
1457  return true;
1458  };
1459 
1460  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1461  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1462  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1463  return true;
1464 
1465  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1466  !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1467  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1468  }
1469 
1470  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1471  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1472  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1473  }
1474 
1475  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1476  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1477  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1478  }
1479 
1480  for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {
1481  Register ParsedReg;
1482  if (parseRegister(YamlReg, ParsedReg))
1483  return true;
1484 
1485  MFI->reserveWWMRegister(ParsedReg);
1486  }
1487 
1488  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1489  const TargetRegisterClass &RC,
1490  ArgDescriptor &Arg, unsigned UserSGPRs,
1491  unsigned SystemSGPRs) {
1492  // Skip parsing if it's not present.
1493  if (!A)
1494  return false;
1495 
1496  if (A->IsRegister) {
1497  Register Reg;
1498  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1499  SourceRange = A->RegisterName.SourceRange;
1500  return true;
1501  }
1502  if (!RC.contains(Reg))
1503  return diagnoseRegisterClass(A->RegisterName);
1505  } else
1506  Arg = ArgDescriptor::createStack(A->StackOffset);
1507  // Check and apply the optional mask.
1508  if (A->Mask)
1509  Arg = ArgDescriptor::createArg(Arg, *A->Mask);
1510 
1511  MFI->NumUserSGPRs += UserSGPRs;
1512  MFI->NumSystemSGPRs += SystemSGPRs;
1513  return false;
1514  };
1515 
1516  if (YamlMFI.ArgInfo &&
1517  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1518  AMDGPU::SGPR_128RegClass,
1519  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1520  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1521  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1522  2, 0) ||
1523  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1524  MFI->ArgInfo.QueuePtr, 2, 0) ||
1525  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1526  AMDGPU::SReg_64RegClass,
1527  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1528  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1529  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1530  2, 0) ||
1531  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1532  AMDGPU::SReg_64RegClass,
1533  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1534  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1535  AMDGPU::SGPR_32RegClass,
1536  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1537  parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,
1538  AMDGPU::SGPR_32RegClass,
1539  MFI->ArgInfo.LDSKernelId, 0, 1) ||
1540  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1541  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1542  0, 1) ||
1543  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1544  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1545  0, 1) ||
1546  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1547  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1548  0, 1) ||
1549  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1550  AMDGPU::SGPR_32RegClass,
1551  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1552  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1553  AMDGPU::SGPR_32RegClass,
1554  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1555  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1556  AMDGPU::SReg_64RegClass,
1557  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1558  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1559  AMDGPU::SReg_64RegClass,
1560  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1561  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1562  AMDGPU::VGPR_32RegClass,
1563  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1564  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1565  AMDGPU::VGPR_32RegClass,
1566  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1567  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1568  AMDGPU::VGPR_32RegClass,
1569  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1570  return true;
1571 
1572  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1573  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1574  MFI->Mode.FP32InputDenormals = YamlMFI.Mode.FP32InputDenormals;
1575  MFI->Mode.FP32OutputDenormals = YamlMFI.Mode.FP32OutputDenormals;
1578 
1579  return false;
1580 }
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition: AliasAnalysis.h:303
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition: TargetPassConfig.h:420
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
llvm::createFastRegisterAllocator
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
Definition: RegAllocFast.cpp:1608
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:881
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: AMDGPUAliasAnalysis.h:46
llvm::ArgDescriptor::createStack
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:49
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition: AMDGPUArgumentUsageInfo.h:127
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition: AMDGPUTargetMachine.cpp:335
llvm::createAMDGPUCtorDtorLoweringPass
ModulePass * createAMDGPUCtorDtorLoweringPass()
RegAllocOptNotSupportedMessage
static const char RegAllocOptNotSupportedMessage[]
Definition: AMDGPUTargetMachine.cpp:1310
llvm::InferAddressSpacesPass
Definition: InferAddressSpaces.h:16
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:165
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
PassBuilder.h
llvm::createGreedyRegisterAllocator
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
Definition: RegAllocGreedy.cpp:186
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:185
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:794
llvm::PseudoSourceValue::GlobalValueCallEntry
@ GlobalValueCallEntry
Definition: PseudoSourceValue.h:43
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: AMDGPUTargetMachine.cpp:579
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition: AMDGPUTargetMachine.cpp:569
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:498
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition: AMDGPUTargetMachine.cpp:1406
llvm::AMDGPULowerModuleLDSPass
Definition: AMDGPU.h:155
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition: SIMemoryLegalizer.cpp:2351
llvm::SILowerSGPRSpillsID
char & SILowerSGPRSpillsID
Definition: SILowerSGPRSpills.cpp:74
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:31
llvm::createAMDGPUSetWavePriorityPass
FunctionPass * createAMDGPUSetWavePriorityPass()
llvm::initializeAMDGPUInsertDelayAluPass
void initializeAMDGPUInsertDelayAluPass(PassRegistry &)
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:455
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::AMDGPUAlwaysInlinePass
Definition: AMDGPU.h:248
llvm::yaml::SIMachineFunctionInfo::ArgInfo
Optional< SIArgumentInfo > ArgInfo
Definition: SIMachineFunctionInfo.h:273
SIMachineFunctionInfo.h
Scalar.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:453
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
T
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:60
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:466
llvm::Attribute
Definition: Attributes.h:67
llvm::AMDGPU::SIModeRegisterDefaults::FP32OutputDenormals
bool FP32OutputDenormals
Definition: AMDGPUBaseInfo.h:1294
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:544
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition: MIRYamlMapping.h:676
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition: PHIElimination.cpp:128
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::LDSKernelId
ArgDescriptor LDSKernelId
Definition: AMDGPUArgumentUsageInfo.h:132
llvm::initializeSIPreAllocateWWMRegsPass
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
InferAddressSpaces.h
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:1285
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition: AlwaysInliner.cpp:175
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition: AMDGPUTargetMachine.cpp:514
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:150
llvm::AMDGPUPromoteAllocaToVectorPass
Definition: AMDGPU.h:233
llvm::initializeAMDGPULateCodeGenPreparePass
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition: FixIrreducible.cpp:104
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:143
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::createVirtRegRewriter
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
Definition: VirtRegMap.cpp:646
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:801
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74
GCNSchedStrategy.h
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition: GCNIterativeScheduler.h:37
llvm::yaml::SIMachineFunctionInfo::VGPRForAGPRCopy
StringValue VGPRForAGPRCopy
Definition: SIMachineFunctionInfo.h:276
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::createAMDGPULateCodeGenPreparePass
FunctionPass * createAMDGPULateCodeGenPreparePass()
Definition: AMDGPULateCodeGenPrepare.cpp:193
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::createSILowerI1CopiesPass
FunctionPass * createSILowerI1CopiesPass()
Definition: SILowerI1Copies.cpp:404
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition: GCNIterativeScheduler.h:36
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition: FlattenCFGPass.cpp:81
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition: Internalize.h:35
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition: AMDGPUArgumentUsageInfo.h:130
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1790
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:31
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:127
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
GCNVOPDUtils.h
llvm::initializeSILowerI1CopiesPass
void initializeSILowerI1CopiesPass(PassRegistry &)
llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:71
EnableSetWavePriority
static cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:454
llvm::initializeAMDGPUDAGToDAGISelPass
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::ShadowStackGCLoweringID
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
Definition: ShadowStackGCLowering.cpp:92
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:175
llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:247
llvm::SIOptimizeVGPRLiveRangeID
char & SIOptimizeVGPRLiveRangeID
Definition: SIOptimizeVGPRLiveRange.cpp:618
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
InstructionSelect.h
EnableStructurizerWorkarounds
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
llvm::AMDGPUPassConfig
Definition: AMDGPUTargetMachine.h:108
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition: AMDGPUAliasAnalysis.h:60
EnableAtomicOptimizations
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:421
llvm::Optional
Definition: APInt.h:33
llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:129
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
llvm::createBarrierNoopPass
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Definition: BarrierNoopPass.cpp:43
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:114
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:17
llvm::AMDGPU::SIModeRegisterDefaults::FP32InputDenormals
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition: AMDGPUBaseInfo.h:1293
llvm::PassBuilder::registerAnalysisRegistrationCallback
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
Definition: PassBuilder.h:515
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
SIMachineScheduler.h
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition: SIMachineFunctionInfo.h:211
llvm::createGVNPass
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:3246
llvm::AMDGPUReleaseVGPRsID
char & AMDGPUReleaseVGPRsID
Definition: AMDGPUReleaseVGPRs.cpp:154
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:509
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition: AMDGPUArgumentUsageInfo.h:131
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition: R600OpenCLImageTypeLoweringPass.cpp:372
llvm::AMDGPUUseNativeCallsPass
Definition: AMDGPU.h:69
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition: AMDGPUArgumentUsageInfo.h:126
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2256
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsID
char & SIPreAllocateWWMRegsID
Definition: SIPreAllocateWWMRegs.cpp:84
AMDGPUIGroupLP.h
llvm::initializeAMDGPURewriteUndefForPHIPass
void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &)
llvm::initializeAMDGPUPromoteKernelArgumentsPass
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition: SIPostRABundler.cpp:69
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::initializeSIShrinkInstructionsPass
void initializeSIShrinkInstructionsPass(PassRegistry &)
LegacyPassManager.h
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition: TwoAddressInstructionPass.cpp:193
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:140
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition: AMDGPUTargetMachine.cpp:1412
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:373
Internalize.h
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:416
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:790
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:51
llvm::AMDGPUPassConfig::addGCPasses
bool addGCPasses() override
addGCPasses - Add late codegen passes that analyze code for garbage collection.
Definition: AMDGPUTargetMachine.cpp:1070
F
#define F(x, y, z)
Definition: MD5.cpp:55
EnableInsertDelayAlu
static cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition: AMDGPUArgumentUsageInfo.h:129
llvm::PseudoSourceValue::JumpTable
@ JumpTable
Definition: PseudoSourceValue.h:40
llvm::initializeAMDGPULowerIntrinsicsPass
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::AMDGPUUnifyMetadataPass
Definition: AMDGPU.h:274
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition: AMDGPUArgumentUsageInfo.h:143
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
CSEInfo.h
FunctionPassCtor
llvm::SIOptimizeExecMaskingID
char & SIOptimizeExecMaskingID
Definition: SIOptimizeExecMasking.cpp:90
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition: SIMachineFunctionInfo.h:267
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
R600.h
llvm::AMDGPUPassConfig::addIRPasses
void addIRPasses() override
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: AMDGPUTargetMachine.cpp:944
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
GCNIterativeScheduler.h
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition: AMDGPUArgumentUsageInfo.h:135
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition: InferAddressSpaces.cpp:1309
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
llvm::TargetPassConfig::TM
LLVMTargetMachine * TM
Definition: TargetPassConfig.h:122
AMDGPUAliasAnalysis.h
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:29
llvm::MSP430Attrs::CodeModel
CodeModel
Definition: MSP430Attributes.h:37
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
AlwaysInliner.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:294
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition: SIMachineFunctionInfo.h:210
llvm::PassBuilder::registerParseAACallback
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
Definition: PassBuilder.h:507
GCNMaxILPSchedRegistry
static MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition: PostRAHazardRecognizer.cpp:61
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::Legalizer
Definition: Legalizer.h:36
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:150
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
AMDGPUTargetInfo.h
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
EnableMaxIlpSchedStrategy
static cl::opt< bool > EnableMaxIlpSchedStrategy("amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false))
R600TargetMachine.h
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition: FuncletLayout.cpp:39
AMDGPUMacroFusion.h
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition: SIInsertWaitcnts.cpp:850
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
EnableLDSReplaceWithPointer
static cl::opt< bool > EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition: PassBuilder.h:97
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition: SIMachineFunctionInfo.h:212
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:122
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::EarlyIfConverterID
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition: EarlyIfConversion.cpp:782
useDefaultRegisterAllocator
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
Definition: TargetPassConfig.cpp:1128
llvm::AMDGPUPromoteAllocaPass
Definition: AMDGPU.h:225
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition: InstructionSelect.h:33
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:721
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition: TargetRegistry.h:1355
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:325
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::Triple::r600
@ r600
Definition: Triple.h:73
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition: UnifyLoopExits.cpp:61
llvm::GCNIterativeScheduler
Definition: GCNIterativeScheduler.h:29
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:412
llvm::PseudoSourceValue::FixedStack
@ FixedStack
Definition: PseudoSourceValue.h:42
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:132
AMDGPUTargetObjectFile.h
llvm::AMDGPULowerKernelAttributesPass
Definition: AMDGPU.h:115
llvm::AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override
getAddressSpaceForPseudoSourceKind - Given the kind of memory (e.g.
Definition: AMDGPUTargetMachine.cpp:782
GVN.h
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
llvm::initializeAMDGPUResourceUsageAnalysisPass
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:175
llvm::AMDGPUPassConfig::addCodeGenPrepare
void addCodeGenPrepare() override
Add pass to prepare the LLVM IR for code generation.
Definition: AMDGPUTargetMachine.cpp:1034
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition: StackMapLivenessAnalysis.cpp:86
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:137
llvm::initializeAMDGPUReplaceLDSUseWithPointerPass
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
PatternMatch.h
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition: Sink.cpp:277
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:354
Utils.h
llvm::SILoadStoreOptimizerID
char & SILoadStoreOptimizerID
Definition: SILoadStoreOptimizer.cpp:800
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:312
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
llvm::initializeSIOptimizeVGPRLiveRangePass
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition: TargetMachine.cpp:53
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1831
llvm::SmallString< 128 >
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:125
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:76
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:33
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:372
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition: LowerSwitch.cpp:587
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:93
AMDGPUTargetTransformInfo.h
llvm::AMDGPUPassConfig::addInstSelector
bool addInstSelector() override
addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...
Definition: AMDGPUTargetMachine.cpp:1065
Passes.h
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:210
llvm::VirtRegRewriterID
char & VirtRegRewriterID
VirtRegRewriter pass.
Definition: VirtRegMap.cpp:227
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:163
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
llvm::initializeSILowerSGPRSpillsPass
void initializeSILowerSGPRSpillsPass(PassRegistry &)
llvm::PseudoSourceValue::ExternalSymbolCallEntry
@ ExternalSymbolCallEntry
Definition: PseudoSourceValue.h:44
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:464
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:561
OptVGPRLiveRange
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
llvm::cl::opt
Definition: CommandLine.h:1411
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition: LCSSA.cpp:491
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1218
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition: TargetMachine.h:95
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
llvm::GCLoweringID
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
Definition: GCRootLowering.cpp:85
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition: SIMachineFunctionInfo.h:266
llvm::GlobalValue
Definition: GlobalValue.h:44
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::initializeGCNCreateVOPDPass
void initializeGCNCreateVOPDPass(PassRegistry &)
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(std::optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition: TargetMachine.h:501
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::initializeSIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowPass(PassRegistry &)
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3488
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition: AMDGPUArgumentUsageInfo.h:137
llvm::RegisterRegAllocBase< RegisterRegAlloc >::FunctionPassCtor
FunctionPass *(*)() FunctionPassCtor
Definition: RegAllocRegistry.h:32
llvm::EngineKind::JIT
@ JIT
Definition: ExecutionEngine.h:524
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition: DetectDeadLanes.cpp:125
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:208
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition: AMDGPUArgumentUsageInfo.h:125
llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition: SIMachineFunctionInfo.h:526
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass()
Definition: AMDGPUAtomicOptimizer.cpp:713
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
IPO.h
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:192
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition: SIMachineFunctionInfo.cpp:625
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:60
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
Definition: SIFixVGPRCopies.cpp:45
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
CGSCCPassManager.h
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:127
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition: GCNIterativeScheduler.h:35
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AMDGPUSimplifyLibCallsPass
Definition: AMDGPU.h:61
llvm::AMDGPUPassConfig::createMachineScheduler
ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
Definition: AMDGPUTargetMachine.cpp:1076
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: TargetPassConfig.cpp:854
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition: TargetPassConfig.cpp:1464
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition: AMDGPUArgumentUsageInfo.h:139
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition: SIFormMemoryClauses.cpp:91
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
LateCFGStructurize
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
TargetPassConfig.h
llvm::yaml::SIMachineFunctionInfo::WWMReservedRegs
SmallVector< StringValue > WWMReservedRegs
Definition: SIMachineFunctionInfo.h:264
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:175
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition: AMDGPUArgumentUsageInfo.h:136
Localizer.h
EnableVOPD
static cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
llvm::PseudoSourceValue::ConstantPool
@ ConstantPool
Definition: PseudoSourceValue.h:41
llvm::MachineCSEID
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:162
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: TargetPassConfig.cpp:996
llvm::AMDGPU::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition: AMDGPUBaseInfo.h:1289
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:273
llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses
void addStraightLineScalarOptimizationPasses()
Definition: AMDGPUTargetMachine.cpp:928
llvm::X86AS::FS
@ FS
Definition: X86.h:200
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:419
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16InputDenormals
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition: AMDGPUBaseInfo.h:1298
llvm::AMDGPUTargetMachine::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition: AMDGPUTargetMachine.cpp:755
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
Definition: AMDGPUTargetMachine.cpp:531
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::AMDGPUPassConfig::getAMDGPUTargetMachine
AMDGPUTargetMachine & getAMDGPUTargetMachine() const
Definition: AMDGPUTargetMachine.h:112
llvm::initializeSIOptimizeExecMaskingPass
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::SIScheduleDAGMI
Definition: SIMachineScheduler.h:425
llvm::PassBuilder::registerPipelineParsingCallback
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
Definition: PassBuilder.h:537
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:273
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::AMDGPUPassConfig::AMDGPUPassConfig
AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
Definition: AMDGPUTargetMachine.cpp:910
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:39
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition: PatchableFunction.cpp:96
AMDGPUExportClustering.h
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:152
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(std::optional< Reloc::Model > RM)
Definition: AVRTargetMachine.cpp:42
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::createSIShrinkInstructionsPass
FunctionPass * createSIShrinkInstructionsPass()
llvm::createAMDGPUMachineCFGStructurizerPass
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
Definition: AMDGPUMachineCFGStructurizer.cpp:2851
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:74
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:487
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
llvm::TargetPassConfig::addPass
AnalysisID addPass(AnalysisID PassID)
Utilities for targets to add passes to the pass manager.
Definition: TargetPassConfig.cpp:782
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:702
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
computeDataLayout
static StringRef computeDataLayout(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:498
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
AMDGPU.h
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition: SIMachineFunctionInfo.h:268
SimplifyLibCalls.h
llvm::AMDGPUPassConfig::addPreISel
bool addPreISel() override
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
Definition: AMDGPUTargetMachine.cpp:1059
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
GlobalDCE.h
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition: SIMachineFunctionInfo.h:274
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:75
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:489
EnablePreRAOptimizations
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
IRTranslator.h
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:127
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
llvm::createVOPDPairingMutation
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
Definition: GCNVOPDUtils.cpp:181
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:57
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition: AMDGPUArgumentUsageInfo.h:146
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition: SIWholeQuadMode.cpp:267
EnableSROA
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::AMDGPUPassConfig::getCSEConfig
std::unique_ptr< CSEConfigBase > getCSEConfig() const override
Returns the CSEConfig object to use for the current optimization level.
Definition: AMDGPUTargetMachine.cpp:840
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:62
llvm::LLVMTargetMachine::initAsmInfo
void initAsmInfo()
Definition: LLVMTargetMachine.cpp:40
llvm::initializeAMDGPUAnnotateUniformValuesPass
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
llvm::RenameIndependentSubregsID
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Definition: RenameIndependentSubregs.cpp:113
llvm::AMDGPUPrintfRuntimeBindingPass
Definition: AMDGPU.h:265
llvm::AMDGPUReplaceLDSUseWithPointerPass
Definition: AMDGPU.h:147
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition: StructurizeCFG.cpp:1205
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: AMDGPUBaseInfo.h:1299
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1397
llvm::PassManager< Module >
llvm::GCNCreateVOPDID
char & GCNCreateVOPDID
Definition: GCNCreateVOPD.cpp:161
llvm::PseudoSourceValue::GOT
@ GOT
Definition: PseudoSourceValue.h:39
llvm::initializeSIFixSGPRCopiesPass
void initializeSIFixSGPRCopiesPass(PassRegistry &)
llvm::PerFunctionMIParsingState
Definition: MIParser.h:162
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition: AMDGPUArgumentUsageInfo.h:138
llvm::OptimizationLevel::getSpeedupLevel
unsigned getSpeedupLevel() const
Definition: OptimizationLevel.h:121
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
LLVM_READNONE
#define LLVM_READNONE
Definition: Compiler.h:189
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:459
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3519
llvm::initializeAMDGPUReleaseVGPRsPass
void initializeAMDGPUReleaseVGPRsPass(PassRegistry &)
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
llvm::initializeSIFixVGPRCopiesPass
void initializeSIFixVGPRCopiesPass(PassRegistry &)
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition: SIMachineFunctionInfo.h:209
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
llvm::AMDGPUPromoteKernelArgumentsPass
Definition: AMDGPU.h:106
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:443
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:832
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline.
Definition: AMDGPUTargetMachine.cpp:583
EnablePromoteKernelArguments
static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition: TargetPassConfig.cpp:1310
llvm::AMDGPUPassConfig::addEarlyCSEOrGVNPass
void addEarlyCSEOrGVNPass()
Definition: AMDGPUTargetMachine.cpp:921
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:400
llvm::AMDGPUPropagateAttributesEarlyPass
Definition: AMDGPU.h:123
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1572
RegBankSelect.h
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:247
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: AMDGPUTargetMachine.cpp:729
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:158
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::PseudoSourceValue::Stack
@ Stack
Definition: PseudoSourceValue.h:38
llvm::ArgDescriptor::createRegister
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:44
PassManager.h
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::createAMDGPUReplaceLDSUseWithPointerPass
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:639
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:408
llvm::TargetPassConfig::disablePass
void disablePass(AnalysisID PassID)
Allow the target to disable a specific standard pass by default.
Definition: TargetPassConfig.h:196
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition: DeadMachineInstructionElim.cpp:56
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition: MIParser.h:164
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:836
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition: AMDGPUArgumentUsageInfo.h:128
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition: AMDGPUPromoteAlloca.cpp:1136
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:886
llvm::AMDGPUPassConfig::isPassEnabled
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOpt::Level Level=CodeGenOpt::Default) const
Check if a pass is enabled given Opt option.
Definition: AMDGPUTargetMachine.h:133
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition: BranchRelaxation.cpp:121
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition: AMDGPUCodeGenPrepare.cpp:1467
llvm::RegisterRegAllocBase
RegisterRegAllocBase class - Track the registration of register allocators.
Definition: RegAllocRegistry.h:30
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:212
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:38
llvm::initializeAMDGPUAttributorPass
void initializeAMDGPUAttributorPass(PassRegistry &)
Legalizer.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:349
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:106
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:126
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:412
llvm::PostRASchedulerID
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
Definition: PostRASchedulerList.cpp:197
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:151
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:299
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: AMDGPUTargetMachine.cpp:735
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:375
N
#define N
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition: StraightLineStrengthReduce.cpp:268
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:325
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:125
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition: GCNPreRAOptimizations.cpp:79
llvm::initializeSILoadStoreOptimizerPass
void initializeSILoadStoreOptimizerPass(PassRegistry &)
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition: LegacyPassManager.h:39
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::createStoreClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1579
llvm::IRTranslator
Definition: IRTranslator.h:64
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:434
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
RegName
#define RegName(no)
llvm::createSIAnnotateControlFlowPass
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
Definition: SIAnnotateControlFlow.cpp:389
Vectorize.h
llvm::yaml::SIMode::IEEE
bool IEEE
Definition: SIMachineFunctionInfo.h:208
llvm::initializeAMDGPUCtorDtorLoweringPass
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &)
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::createBasicRegisterAllocator
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
Definition: RegAllocBasic.cpp:333
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
createGCNMaxILPMachineScheduler
static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:435
PB
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition: MIPatternMatch.h:772
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition: MachineLICM.cpp:297
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:556
llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition: MachineScheduler.cpp:243
llvm::cl::desc
Definition: CommandLine.h:412
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:392
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:120
llvm::AMDGPUInsertDelayAluID
char & AMDGPUInsertDelayAluID
Definition: AMDGPUInsertDelayAlu.cpp:454
llvm::CodeGenOpt::Less
@ Less
Definition: CodeGen.h:54
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition: TargetPassConfig.cpp:1454
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
TargetRegistry.h
llvm::createSROAPass
FunctionPass * createSROAPass()
Definition: SROA.cpp:4864
llvm::AMDGPUPropagateAttributesLatePass
Definition: AMDGPU.h:135
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
InitializePasses.h
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: SIMachineFunctionInfo.h:213
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition: SIOptimizeExecMaskingPreRA.cpp:75
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition: AMDGPUMCTargetDesc.cpp:71
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition: TargetMachine.h:105
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::createAMDGPURewriteUndefForPHIPass
FunctionPass * createAMDGPURewriteUndefForPHIPass()
Definition: AMDGPURewriteUndefForPHI.cpp:179
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:37
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:371
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition: TargetPassConfig.h:374
llvm::GCNPostScheduleDAGMILive
Definition: GCNSchedStrategy.h:361
llvm::TargetPassConfig::getOptLevel
CodeGenOpt::Level getOptLevel() const
Definition: TargetPassConfig.cpp:645
AMDGPUTargetMachine.h
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition: AMDGPUTargetMachine.cpp:1401
PassName
static const char PassName[]
Definition: X86LowerAMXIntrinsics.cpp:671
llvm::initializeSILowerControlFlowPass
void initializeSILowerControlFlowPass(PassRegistry &)
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
llvm::createIGroupLPDAGMutation
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation()
Definition: AMDGPUIGroupLP.cpp:1156
RegAllocRegistry.h
MIParser.h
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition: Localizer.h:43
AMDGPUBaseInfo.h
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition: AMDGPUMacroFusion.cpp:62