LLVM  14.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for SI+ GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUMacroFusion.h"
20 #include "AMDGPUTargetObjectFile.h"
22 #include "GCNIterativeScheduler.h"
23 #include "GCNSchedStrategy.h"
24 #include "R600.h"
25 #include "R600TargetMachine.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "SIMachineScheduler.h"
36 #include "llvm/CodeGen/Passes.h"
40 #include "llvm/IR/PassManager.h"
41 #include "llvm/InitializePasses.h"
42 #include "llvm/MC/TargetRegistry.h"
44 #include "llvm/Transforms/IPO.h"
49 #include "llvm/Transforms/Scalar.h"
52 #include "llvm/Transforms/Utils.h"
55 
56 using namespace llvm;
57 
58 namespace {
59 class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
60 public:
61  SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
62  : RegisterRegAllocBase(N, D, C) {}
63 };
64 
65 class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
66 public:
67  VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
68  : RegisterRegAllocBase(N, D, C) {}
69 };
70 
71 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
72  const TargetRegisterClass &RC) {
73  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
74 }
75 
76 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
77  const TargetRegisterClass &RC) {
78  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
79 }
80 
81 
82 /// -{sgpr|vgpr}-regalloc=... command line option.
83 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
84 
85 /// A dummy default pass factory indicates whether the register allocator is
86 /// overridden on the command line.
87 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
88 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
89 
90 static SGPRRegisterRegAlloc
91 defaultSGPRRegAlloc("default",
92  "pick SGPR register allocator based on -O option",
94 
95 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
97 SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
98  cl::desc("Register allocator to use for SGPRs"));
99 
100 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
102 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
103  cl::desc("Register allocator to use for VGPRs"));
104 
105 
106 static void initializeDefaultSGPRRegisterAllocatorOnce() {
107  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
108 
109  if (!Ctor) {
110  Ctor = SGPRRegAlloc;
111  SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
112  }
113 }
114 
115 static void initializeDefaultVGPRRegisterAllocatorOnce() {
116  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
117 
118  if (!Ctor) {
119  Ctor = VGPRRegAlloc;
120  VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
121  }
122 }
123 
124 static FunctionPass *createBasicSGPRRegisterAllocator() {
125  return createBasicRegisterAllocator(onlyAllocateSGPRs);
126 }
127 
128 static FunctionPass *createGreedySGPRRegisterAllocator() {
129  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
130 }
131 
132 static FunctionPass *createFastSGPRRegisterAllocator() {
133  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
134 }
135 
136 static FunctionPass *createBasicVGPRRegisterAllocator() {
137  return createBasicRegisterAllocator(onlyAllocateVGPRs);
138 }
139 
140 static FunctionPass *createGreedyVGPRRegisterAllocator() {
141  return createGreedyRegisterAllocator(onlyAllocateVGPRs);
142 }
143 
144 static FunctionPass *createFastVGPRRegisterAllocator() {
145  return createFastRegisterAllocator(onlyAllocateVGPRs, true);
146 }
147 
148 static SGPRRegisterRegAlloc basicRegAllocSGPR(
149  "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
150 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
151  "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
152 
153 static SGPRRegisterRegAlloc fastRegAllocSGPR(
154  "fast", "fast register allocator", createFastSGPRRegisterAllocator);
155 
156 
157 static VGPRRegisterRegAlloc basicRegAllocVGPR(
158  "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
159 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
160  "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
161 
162 static VGPRRegisterRegAlloc fastRegAllocVGPR(
163  "fast", "fast register allocator", createFastVGPRRegisterAllocator);
164 }
165 
167  "amdgpu-sroa",
168  cl::desc("Run SROA after promote alloca pass"),
170  cl::init(true));
171 
172 static cl::opt<bool>
173 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
174  cl::desc("Run early if-conversion"),
175  cl::init(false));
176 
177 static cl::opt<bool>
178 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
179  cl::desc("Run pre-RA exec mask optimizations"),
180  cl::init(true));
181 
182 // Option to disable vectorizer for tests.
184  "amdgpu-load-store-vectorizer",
185  cl::desc("Enable load store vectorizer"),
186  cl::init(true),
187  cl::Hidden);
188 
189 // Option to control global loads scalarization
191  "amdgpu-scalarize-global-loads",
192  cl::desc("Enable global load scalarization"),
193  cl::init(true),
194  cl::Hidden);
195 
196 // Option to run internalize pass.
198  "amdgpu-internalize-symbols",
199  cl::desc("Enable elimination of non-kernel functions and unused globals"),
200  cl::init(false),
201  cl::Hidden);
202 
203 // Option to inline all early.
205  "amdgpu-early-inline-all",
206  cl::desc("Inline all functions early"),
207  cl::init(false),
208  cl::Hidden);
209 
211  "amdgpu-sdwa-peephole",
212  cl::desc("Enable SDWA peepholer"),
213  cl::init(true));
214 
216  "amdgpu-dpp-combine",
217  cl::desc("Enable DPP combiner"),
218  cl::init(true));
219 
220 // Enable address space based alias analysis
221 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
222  cl::desc("Enable AMDGPU Alias Analysis"),
223  cl::init(true));
224 
225 // Option to run late CFG structurizer
227  "amdgpu-late-structurize",
228  cl::desc("Enable late CFG structurization"),
230  cl::Hidden);
231 
233  "amdgpu-fixed-function-abi",
234  cl::desc("Enable all implicit function arguments"),
236  cl::init(false),
237  cl::Hidden);
238 
239 // Enable lib calls simplifications
241  "amdgpu-simplify-libcall",
242  cl::desc("Enable amdgpu library simplifications"),
243  cl::init(true),
244  cl::Hidden);
245 
247  "amdgpu-ir-lower-kernel-arguments",
248  cl::desc("Lower kernel argument loads in IR pass"),
249  cl::init(true),
250  cl::Hidden);
251 
253  "amdgpu-reassign-regs",
254  cl::desc("Enable register reassign optimizations on gfx10+"),
255  cl::init(true),
256  cl::Hidden);
257 
259  "amdgpu-opt-vgpr-liverange",
260  cl::desc("Enable VGPR liverange optimizations for if-else structure"),
261  cl::init(true), cl::Hidden);
262 
263 // Enable atomic optimization
265  "amdgpu-atomic-optimizations",
266  cl::desc("Enable atomic optimizations"),
267  cl::init(false),
268  cl::Hidden);
269 
270 // Enable Mode register optimization
272  "amdgpu-mode-register",
273  cl::desc("Enable mode register pass"),
274  cl::init(true),
275  cl::Hidden);
276 
277 // Option is used in lit tests to prevent deadcoding of patterns inspected.
278 static cl::opt<bool>
279 EnableDCEInRA("amdgpu-dce-in-ra",
280  cl::init(true), cl::Hidden,
281  cl::desc("Enable machine DCE inside regalloc"));
282 
284  "amdgpu-scalar-ir-passes",
285  cl::desc("Enable scalar IR passes"),
286  cl::init(true),
287  cl::Hidden);
288 
290  "amdgpu-enable-structurizer-workarounds",
291  cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
292  cl::Hidden);
293 
295  "amdgpu-enable-lds-replace-with-pointer",
296  cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
297  cl::Hidden);
298 
300  "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
302  cl::Hidden);
303 
305  "amdgpu-enable-pre-ra-optimizations",
306  cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
307  cl::Hidden);
308 
310  "amdgpu-enable-promote-kernel-arguments",
311  cl::desc("Enable promotion of flat kernel pointer arguments to global"),
312  cl::Hidden, cl::init(true));
313 
315  // Register the target
318 
386 }
387 
388 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
389  return std::make_unique<AMDGPUTargetObjectFile>();
390 }
391 
393  return new SIScheduleDAGMI(C);
394 }
395 
396 static ScheduleDAGInstrs *
398  ScheduleDAGMILive *DAG =
399  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
403  return DAG;
404 }
405 
406 static ScheduleDAGInstrs *
408  auto DAG = new GCNIterativeScheduler(C,
410  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
411  return DAG;
412 }
413 
415  return new GCNIterativeScheduler(C,
417 }
418 
419 static ScheduleDAGInstrs *
421  auto DAG = new GCNIterativeScheduler(C,
423  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
424  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
425  return DAG;
426 }
427 
429 SISchedRegistry("si", "Run SI's custom scheduler",
431 
433 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
434  "Run GCN scheduler to maximize occupancy",
436 
438 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
439  "Run GCN scheduler to maximize occupancy (experimental)",
441 
443 GCNMinRegSchedRegistry("gcn-minreg",
444  "Run GCN iterative scheduler for minimal register usage (experimental)",
446 
448 GCNILPSchedRegistry("gcn-ilp",
449  "Run GCN iterative scheduler for ILP scheduling (experimental)",
451 
452 static StringRef computeDataLayout(const Triple &TT) {
453  if (TT.getArch() == Triple::r600) {
454  // 32-bit pointers.
455  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
456  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
457  }
458 
459  // 32-bit private, local, and region pointers. 64-bit global, constant and
460  // flat, non-integral buffer fat pointers.
461  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
462  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
463  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
464  "-ni:7";
465 }
466 
468 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
469  if (!GPU.empty())
470  return GPU;
471 
472  // Need to default to a target with flat support for HSA.
473  if (TT.getArch() == Triple::amdgcn)
474  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
475 
476  return "r600";
477 }
478 
480  // The AMDGPU toolchain only supports generating shared objects, so we
481  // must always use PIC.
482  return Reloc::PIC_;
483 }
484 
486  StringRef CPU, StringRef FS,
490  CodeGenOpt::Level OptLevel)
493  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
494  TLOF(createTLOF(getTargetTriple())) {
495  initAsmInfo();
496  if (TT.getArch() == Triple::amdgcn) {
497  if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
499  else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
501  }
502 }
503 
508 
510 
512  Attribute GPUAttr = F.getFnAttribute("target-cpu");
513  return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
514 }
515 
517  Attribute FSAttr = F.getFnAttribute("target-features");
518 
519  return FSAttr.isValid() ? FSAttr.getValueAsString()
521 }
522 
523 /// Predicate for Internalize pass.
524 static bool mustPreserveGV(const GlobalValue &GV) {
525  if (const Function *F = dyn_cast<Function>(&GV))
526  return F->isDeclaration() || F->getName().startswith("__asan_") ||
527  F->getName().startswith("__sanitizer_") ||
528  AMDGPU::isEntryFunctionCC(F->getCallingConv());
529 
531  return !GV.use_empty();
532 }
533 
535  Builder.DivergentTarget = true;
536 
537  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
538  bool Internalize = InternalizeSymbols;
539  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
540  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
541  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
542  bool PromoteKernelArguments =
544 
545  if (EnableFunctionCalls) {
546  delete Builder.Inliner;
548  }
549 
550  Builder.addExtension(
552  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
554  if (AMDGPUAA) {
557  }
560  if (Internalize)
563  if (Internalize)
564  PM.add(createGlobalDCEPass());
565  if (EarlyInline)
567  });
568 
569  Builder.addExtension(
571  [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
573  if (AMDGPUAA) {
576  }
579  if (LibCallSimplify)
581  });
582 
583  Builder.addExtension(
585  [EnableOpt, PromoteKernelArguments](const PassManagerBuilder &,
587  // Add promote kernel arguments pass to the opt pipeline right before
588  // infer address spaces which is needed to do actual address space
589  // rewriting.
590  if (PromoteKernelArguments)
592 
593  // Add infer address spaces pass to the opt pipeline after inlining
594  // but before SROA to increase SROA opportunities.
596 
597  // This should run after inlining to have any chance of doing anything,
598  // and before other cleanup optimizations.
600 
601  // Promote alloca to vector before SROA and loop unroll. If we manage
602  // to eliminate allocas before unroll we may choose to unroll less.
603  if (EnableOpt)
605  });
606 }
607 
610 }
611 
616  if (PassName == "amdgpu-propagate-attributes-late") {
618  return true;
619  }
620  if (PassName == "amdgpu-unify-metadata") {
622  return true;
623  }
624  if (PassName == "amdgpu-printf-runtime-binding") {
626  return true;
627  }
628  if (PassName == "amdgpu-always-inline") {
630  return true;
631  }
632  if (PassName == "amdgpu-replace-lds-use-with-pointer") {
634  return true;
635  }
636  if (PassName == "amdgpu-lower-module-lds") {
638  return true;
639  }
640  return false;
641  });
645  if (PassName == "amdgpu-simplifylib") {
647  return true;
648  }
649  if (PassName == "amdgpu-usenative") {
651  return true;
652  }
653  if (PassName == "amdgpu-promote-alloca") {
654  PM.addPass(AMDGPUPromoteAllocaPass(*this));
655  return true;
656  }
657  if (PassName == "amdgpu-promote-alloca-to-vector") {
659  return true;
660  }
661  if (PassName == "amdgpu-lower-kernel-attributes") {
663  return true;
664  }
665  if (PassName == "amdgpu-propagate-attributes-early") {
667  return true;
668  }
669  if (PassName == "amdgpu-promote-kernel-arguments") {
671  return true;
672  }
673  return false;
674  });
675 
677  FAM.registerPass([&] { return AMDGPUAA(); });
678  });
679 
680  PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
681  if (AAName == "amdgpu-aa") {
683  return true;
684  }
685  return false;
686  });
687 
696  });
697 
701  return;
702 
705 
706  if (InternalizeSymbols) {
708  }
710  if (InternalizeSymbols) {
711  PM.addPass(GlobalDCEPass());
712  }
715  });
716 
720  return;
721 
723 
724  // Add promote kernel arguments pass to the opt pipeline right before
725  // infer address spaces which is needed to do actual address space
726  // rewriting.
727  if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
730 
731  // Add infer address spaces pass to the opt pipeline after inlining
732  // but before SROA to increase SROA opportunities.
734 
735  // This should run after inlining to have any chance of doing
736  // anything, and before other cleanup optimizations.
738 
739  if (Level != OptimizationLevel::O0) {
740  // Promote alloca to vector before SROA and loop unroll. If we
741  // manage to eliminate allocas before unroll we may choose to unroll
742  // less.
744  }
745 
747  });
748 }
749 
750 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
751  return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
752  AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
753  AddrSpace == AMDGPUAS::REGION_ADDRESS)
754  ? -1
755  : 0;
756 }
757 
759  unsigned DestAS) const {
760  return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
762 }
763 
765  const auto *LD = dyn_cast<LoadInst>(V);
766  if (!LD)
768 
769  // It must be a generic pointer loaded.
770  assert(V->getType()->isPointerTy() &&
772 
773  const auto *Ptr = LD->getPointerOperand();
774  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
776  // For a generic pointer loaded from the constant memory, it could be assumed
777  // as a global pointer since the constant memory is only populated on the
778  // host side. As implied by the offload programming model, only global
779  // pointers could be referenced on the host side.
781 }
782 
783 //===----------------------------------------------------------------------===//
784 // GCN Target Machine (SI+)
785 //===----------------------------------------------------------------------===//
786 
788  StringRef CPU, StringRef FS,
792  CodeGenOpt::Level OL, bool JIT)
793  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
794 
795 const TargetSubtargetInfo *
797  StringRef GPU = getGPUName(F);
799 
800  SmallString<128> SubtargetKey(GPU);
801  SubtargetKey.append(FS);
802 
803  auto &I = SubtargetMap[SubtargetKey];
804  if (!I) {
805  // This needs to be done before we create a new subtarget since any
806  // creation will depend on the TM and the code generation flags on the
807  // function that reside in TargetOptions.
809  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
810  }
811 
812  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
813 
814  return I.get();
815 }
816 
819  return TargetTransformInfo(GCNTTIImpl(this, F));
820 }
821 
822 //===----------------------------------------------------------------------===//
823 // AMDGPU Pass Setup
824 //===----------------------------------------------------------------------===//
825 
826 std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
827  return getStandardCSEConfigForOpt(TM->getOptLevel());
828 }
829 
830 namespace {
831 
832 class GCNPassConfig final : public AMDGPUPassConfig {
833 public:
834  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
835  : AMDGPUPassConfig(TM, PM) {
836  // It is necessary to know the register usage of the entire call graph. We
837  // allow calls without EnableAMDGPUFunctionCalls if they are marked
838  // noinline, so this is always required.
839  setRequiresCodeGenSCCOrder(true);
840  substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
841  }
842 
843  GCNTargetMachine &getGCNTargetMachine() const {
844  return getTM<GCNTargetMachine>();
845  }
846 
848  createMachineScheduler(MachineSchedContext *C) const override;
849 
851  createPostMachineScheduler(MachineSchedContext *C) const override {
853  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
855  DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
856  return DAG;
857  }
858 
859  bool addPreISel() override;
860  void addMachineSSAOptimization() override;
861  bool addILPOpts() override;
862  bool addInstSelector() override;
863  bool addIRTranslator() override;
864  void addPreLegalizeMachineIR() override;
865  bool addLegalizeMachineIR() override;
866  void addPreRegBankSelect() override;
867  bool addRegBankSelect() override;
868  void addPreGlobalInstructionSelect() override;
869  bool addGlobalInstructionSelect() override;
870  void addFastRegAlloc() override;
871  void addOptimizedRegAlloc() override;
872 
873  FunctionPass *createSGPRAllocPass(bool Optimized);
874  FunctionPass *createVGPRAllocPass(bool Optimized);
875  FunctionPass *createRegAllocPass(bool Optimized) override;
876 
877  bool addRegAssignAndRewriteFast() override;
878  bool addRegAssignAndRewriteOptimized() override;
879 
880  void addPreRegAlloc() override;
881  bool addPreRewrite() override;
882  void addPostRegAlloc() override;
883  void addPreSched2() override;
884  void addPreEmitPass() override;
885 };
886 
887 } // end anonymous namespace
888 
890  : TargetPassConfig(TM, PM) {
891  // Exceptions and StackMaps are not supported, so these passes will never do
892  // anything.
895  // Garbage collection is not supported.
898 }
899 
903  else
905 }
906 
911  // ReassociateGEPs exposes more opportunities for SLSR. See
912  // the example in reassociate-geps-and-slsr.ll.
914  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
915  // EarlyCSE can reuse.
917  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
919  // NaryReassociate on GEPs creates redundant common expressions, so run
920  // EarlyCSE after it.
922 }
923 
926 
927  // There is no reason to run these.
931 
934 
935  // This must occur before inlining, as the inliner will not look through
936  // bitcast calls.
938 
939  // A call to propagate attributes pass in the backend in case opt was not run.
941 
943 
944  // Function calls are not supported, so make sure we inline everything.
947  // We need to add the barrier noop pass, otherwise adding the function
948  // inlining pass will cause all of the PassConfigs passes to be run
949  // one function at a time, which means if we have a nodule with two
950  // functions, then we will generate code for the first function
951  // without ever running any passes on the second.
953 
954  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
957 
958  // Replace OpenCL enqueued block function pointers with global variables.
960 
961  // Can increase LDS used by kernel so runs before PromoteAlloca
962  if (EnableLowerModuleLDS) {
963  // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
964  // pass "amdgpu-lower-module-lds", and also it required to be run only if
965  // "amdgpu-lower-module-lds" pass is enabled.
968 
970  }
971 
974 
976 
977  if (TM.getOptLevel() > CodeGenOpt::None) {
979 
980  if (EnableSROA)
984 
988  AAResults &AAR) {
989  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
990  AAR.addAAResult(WrapperPass->getResult());
991  }));
992  }
993 
995  // TODO: May want to move later or split into an early and late one.
997  }
998  }
999 
1001 
1002  // EarlyCSE is not always strong enough to clean up what LSR produces. For
1003  // example, GVN can combine
1004  //
1005  // %0 = add %a, %b
1006  // %1 = add %b, %a
1007  //
1008  // and
1009  //
1010  // %0 = shl nsw %a, 2
1011  // %1 = shl %a, 2
1012  //
1013  // but EarlyCSE can do neither of them.
1016 }
1017 
1019  if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1021 
1022  // FIXME: This pass adds 2 hacky attributes that can be replaced with an
1023  // analysis, and should be removed.
1025  }
1026 
1027  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1030 
1032 
1035 
1036  // LowerSwitch pass may introduce unreachable blocks that can
1037  // cause unexpected behavior for subsequent passes. Placing it
1038  // here seems better that these blocks would get cleaned up by
1039  // UnreachableBlockElim inserted next in the pass flow.
1041 }
1042 
1044  if (TM->getOptLevel() > CodeGenOpt::None)
1046  return false;
1047 }
1048 
1051  return false;
1052 }
1053 
1055  // Do nothing. GC is not supported.
1056  return false;
1057 }
1058 
1062  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1063  return DAG;
1064 }
1065 
1066 //===----------------------------------------------------------------------===//
1067 // GCN Pass Setup
1068 //===----------------------------------------------------------------------===//
1069 
1070 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1071  MachineSchedContext *C) const {
1072  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1073  if (ST.enableSIScheduler())
1074  return createSIMachineScheduler(C);
1076 }
1077 
1078 bool GCNPassConfig::addPreISel() {
1080 
1081  if (TM->getOptLevel() > CodeGenOpt::None)
1083 
1084  if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1086  }
1087 
1088  if (TM->getOptLevel() > CodeGenOpt::None)
1089  addPass(createSinkingPass());
1090 
1091  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1092  // regions formed by them.
1094  if (!LateCFGStructurize) {
1096  addPass(createFixIrreduciblePass());
1097  addPass(createUnifyLoopExitsPass());
1098  }
1099  addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1100  }
1102  if (!LateCFGStructurize) {
1104  }
1105  addPass(createLCSSAPass());
1106 
1107  if (TM->getOptLevel() > CodeGenOpt::Less)
1108  addPass(&AMDGPUPerfHintAnalysisID);
1109 
1110  return false;
1111 }
1112 
1113 void GCNPassConfig::addMachineSSAOptimization() {
1115 
1116  // We want to fold operands after PeepholeOptimizer has run (or as part of
1117  // it), because it will eliminate extra copies making it easier to fold the
1118  // real source operand. We want to eliminate dead instructions after, so that
1119  // we see fewer uses of the copies. We then need to clean up the dead
1120  // instructions leftover after the operands are folded as well.
1121  //
1122  // XXX - Can we get away without running DeadMachineInstructionElim again?
1123  addPass(&SIFoldOperandsID);
1124  if (EnableDPPCombine)
1125  addPass(&GCNDPPCombineID);
1126  addPass(&SILoadStoreOptimizerID);
1127  if (isPassEnabled(EnableSDWAPeephole)) {
1128  addPass(&SIPeepholeSDWAID);
1129  addPass(&EarlyMachineLICMID);
1130  addPass(&MachineCSEID);
1131  addPass(&SIFoldOperandsID);
1132  }
1133  addPass(&DeadMachineInstructionElimID);
1134  addPass(createSIShrinkInstructionsPass());
1135 }
1136 
1137 bool GCNPassConfig::addILPOpts() {
1139  addPass(&EarlyIfConverterID);
1140 
1142  return false;
1143 }
1144 
1145 bool GCNPassConfig::addInstSelector() {
1147  addPass(&SIFixSGPRCopiesID);
1148  addPass(createSILowerI1CopiesPass());
1149  return false;
1150 }
1151 
1152 bool GCNPassConfig::addIRTranslator() {
1153  addPass(new IRTranslator(getOptLevel()));
1154  return false;
1155 }
1156 
1157 void GCNPassConfig::addPreLegalizeMachineIR() {
1158  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1159  addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1160  addPass(new Localizer());
1161 }
1162 
1163 bool GCNPassConfig::addLegalizeMachineIR() {
1164  addPass(new Legalizer());
1165  return false;
1166 }
1167 
1168 void GCNPassConfig::addPreRegBankSelect() {
1169  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1170  addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1171 }
1172 
1173 bool GCNPassConfig::addRegBankSelect() {
1174  addPass(new RegBankSelect());
1175  return false;
1176 }
1177 
1178 void GCNPassConfig::addPreGlobalInstructionSelect() {
1179  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1180  addPass(createAMDGPURegBankCombiner(IsOptNone));
1181 }
1182 
1183 bool GCNPassConfig::addGlobalInstructionSelect() {
1184  addPass(new InstructionSelect(getOptLevel()));
1185  return false;
1186 }
1187 
1188 void GCNPassConfig::addPreRegAlloc() {
1189  if (LateCFGStructurize) {
1191  }
1192 }
1193 
1194 void GCNPassConfig::addFastRegAlloc() {
1195  // FIXME: We have to disable the verifier here because of PHIElimination +
1196  // TwoAddressInstructions disabling it.
1197 
1198  // This must be run immediately after phi elimination and before
1199  // TwoAddressInstructions, otherwise the processing of the tied operand of
1200  // SI_ELSE will introduce a copy of the tied operand source after the else.
1201  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1202 
1205 
1207 }
1208 
1209 void GCNPassConfig::addOptimizedRegAlloc() {
1210  // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1211  // instructions that cause scheduling barriers.
1212  insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1214 
1215  if (OptExecMaskPreRA)
1217 
1218  if (isPassEnabled(EnablePreRAOptimizations))
1220 
1221  // This is not an essential optimization and it has a noticeable impact on
1222  // compilation time, so we only enable it from O2.
1223  if (TM->getOptLevel() > CodeGenOpt::Less)
1225 
1226  // FIXME: when an instruction has a Killed operand, and the instruction is
1227  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1228  // the register in LiveVariables, this would trigger a failure in verifier,
1229  // we should fix it and enable the verifier.
1230  if (OptVGPRLiveRange)
1232  // This must be run immediately after phi elimination and before
1233  // TwoAddressInstructions, otherwise the processing of the tied operand of
1234  // SI_ELSE will introduce a copy of the tied operand source after the else.
1235  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1236 
1237  if (EnableDCEInRA)
1239 
1241 }
1242 
1243 bool GCNPassConfig::addPreRewrite() {
1244  if (EnableRegReassign)
1245  addPass(&GCNNSAReassignID);
1246  return true;
1247 }
1248 
1249 FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1250  // Initialize the global default.
1251  llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1252  initializeDefaultSGPRRegisterAllocatorOnce);
1253 
1254  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1255  if (Ctor != useDefaultRegisterAllocator)
1256  return Ctor();
1257 
1258  if (Optimized)
1259  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1260 
1261  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1262 }
1263 
1264 FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1265  // Initialize the global default.
1266  llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1267  initializeDefaultVGPRRegisterAllocatorOnce);
1268 
1269  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1270  if (Ctor != useDefaultRegisterAllocator)
1271  return Ctor();
1272 
1273  if (Optimized)
1274  return createGreedyVGPRRegisterAllocator();
1275 
1276  return createFastVGPRRegisterAllocator();
1277 }
1278 
1279 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1280  llvm_unreachable("should not be used");
1281 }
1282 
1283 static const char RegAllocOptNotSupportedMessage[] =
1284  "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1285 
1286 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1287  if (!usingDefaultRegAlloc())
1289 
1290  addPass(createSGPRAllocPass(false));
1291 
1292  // Equivalent of PEI for SGPRs.
1293  addPass(&SILowerSGPRSpillsID);
1294 
1295  addPass(createVGPRAllocPass(false));
1296  return true;
1297 }
1298 
1299 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1300  if (!usingDefaultRegAlloc())
1302 
1303  addPass(createSGPRAllocPass(true));
1304 
1305  // Commit allocated register changes. This is mostly necessary because too
1306  // many things rely on the use lists of the physical registers, such as the
1307  // verifier. This is only necessary with allocators which use LiveIntervals,
1308  // since FastRegAlloc does the replacements itself.
1309  addPass(createVirtRegRewriter(false));
1310 
1311  // Equivalent of PEI for SGPRs.
1312  addPass(&SILowerSGPRSpillsID);
1313 
1314  addPass(createVGPRAllocPass(true));
1315 
1316  addPreRewrite();
1317  addPass(&VirtRegRewriterID);
1318 
1319  return true;
1320 }
1321 
1322 void GCNPassConfig::addPostRegAlloc() {
1323  addPass(&SIFixVGPRCopiesID);
1324  if (getOptLevel() > CodeGenOpt::None)
1325  addPass(&SIOptimizeExecMaskingID);
1327 }
1328 
1329 void GCNPassConfig::addPreSched2() {
1330  if (TM->getOptLevel() > CodeGenOpt::None)
1331  addPass(createSIShrinkInstructionsPass());
1332  addPass(&SIPostRABundlerID);
1333 }
1334 
1335 void GCNPassConfig::addPreEmitPass() {
1336  addPass(createSIMemoryLegalizerPass());
1337  addPass(createSIInsertWaitcntsPass());
1338 
1339  addPass(createSIModeRegisterPass());
1340 
1341  if (getOptLevel() > CodeGenOpt::None)
1342  addPass(&SIInsertHardClausesID);
1343 
1344  addPass(&SILateBranchLoweringPassID);
1345  if (getOptLevel() > CodeGenOpt::None)
1346  addPass(&SIPreEmitPeepholeID);
1347  // The hazard recognizer that runs as part of the post-ra scheduler does not
1348  // guarantee to be able handle all hazards correctly. This is because if there
1349  // are multiple scheduling regions in a basic block, the regions are scheduled
1350  // bottom up, so when we begin to schedule a region we don't know what
1351  // instructions were emitted directly before it.
1352  //
1353  // Here we add a stand-alone hazard recognizer pass which can handle all
1354  // cases.
1355  addPass(&PostRAHazardRecognizerID);
1356  addPass(&BranchRelaxationPassID);
1357 }
1358 
1360  return new GCNPassConfig(*this, PM);
1361 }
1362 
1364  return new yaml::SIMachineFunctionInfo();
1365 }
1366 
1370  return new yaml::SIMachineFunctionInfo(
1371  *MFI, *MF.getSubtarget().getRegisterInfo(), MF);
1372 }
1373 
1376  SMDiagnostic &Error, SMRange &SourceRange) const {
1377  const yaml::SIMachineFunctionInfo &YamlMFI =
1378  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1379  MachineFunction &MF = PFS.MF;
1381 
1382  if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1383  return true;
1384 
1385  if (MFI->Occupancy == 0) {
1386  // Fixup the subtarget dependent default value.
1387  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1388  MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1389  }
1390 
1391  auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1392  Register TempReg;
1393  if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1394  SourceRange = RegName.SourceRange;
1395  return true;
1396  }
1397  RegVal = TempReg;
1398 
1399  return false;
1400  };
1401 
1402  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1403  // Create a diagnostic for a the register string literal.
1404  const MemoryBuffer &Buffer =
1405  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1406  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1407  RegName.Value.size(), SourceMgr::DK_Error,
1408  "incorrect register class for field", RegName.Value,
1409  None, None);
1410  SourceRange = RegName.SourceRange;
1411  return true;
1412  };
1413 
1414  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1415  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1416  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1417  return true;
1418 
1419  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1420  !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1421  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1422  }
1423 
1424  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1425  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1426  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1427  }
1428 
1429  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1430  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1431  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1432  }
1433 
1434  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1435  const TargetRegisterClass &RC,
1436  ArgDescriptor &Arg, unsigned UserSGPRs,
1437  unsigned SystemSGPRs) {
1438  // Skip parsing if it's not present.
1439  if (!A)
1440  return false;
1441 
1442  if (A->IsRegister) {
1443  Register Reg;
1444  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1445  SourceRange = A->RegisterName.SourceRange;
1446  return true;
1447  }
1448  if (!RC.contains(Reg))
1449  return diagnoseRegisterClass(A->RegisterName);
1451  } else
1452  Arg = ArgDescriptor::createStack(A->StackOffset);
1453  // Check and apply the optional mask.
1454  if (A->Mask)
1455  Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
1456 
1457  MFI->NumUserSGPRs += UserSGPRs;
1458  MFI->NumSystemSGPRs += SystemSGPRs;
1459  return false;
1460  };
1461 
1462  if (YamlMFI.ArgInfo &&
1463  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1464  AMDGPU::SGPR_128RegClass,
1465  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1466  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1467  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1468  2, 0) ||
1469  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1470  MFI->ArgInfo.QueuePtr, 2, 0) ||
1471  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1472  AMDGPU::SReg_64RegClass,
1473  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1474  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1475  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1476  2, 0) ||
1477  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1478  AMDGPU::SReg_64RegClass,
1479  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1480  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1481  AMDGPU::SGPR_32RegClass,
1482  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1483  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1484  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1485  0, 1) ||
1486  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1487  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1488  0, 1) ||
1489  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1490  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1491  0, 1) ||
1492  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1493  AMDGPU::SGPR_32RegClass,
1494  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1495  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1496  AMDGPU::SGPR_32RegClass,
1497  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1498  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1499  AMDGPU::SReg_64RegClass,
1500  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1501  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1502  AMDGPU::SReg_64RegClass,
1503  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1504  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1505  AMDGPU::VGPR_32RegClass,
1506  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1507  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1508  AMDGPU::VGPR_32RegClass,
1509  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1510  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1511  AMDGPU::VGPR_32RegClass,
1512  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1513  return true;
1514 
1515  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1516  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1521 
1522  return false;
1523 }
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition: AliasAnalysis.h:517
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition: TargetPassConfig.h:417
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:188
llvm::createFastRegisterAllocator
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
Definition: RegAllocFast.cpp:1567
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1288
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: AMDGPUAliasAnalysis.h:48
llvm::ArgDescriptor::createStack
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:49
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition: AMDGPUArgumentUsageInfo.h:126
llvm::AMDGPUTargetMachine::EnableFixedFunctionABI
static bool EnableFixedFunctionABI
Definition: AMDGPUTargetMachine.h:39
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:517
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition: AMDGPUTargetMachine.cpp:314
llvm::createAMDGPUCtorDtorLoweringPass
ModulePass * createAMDGPUCtorDtorLoweringPass()
RegAllocOptNotSupportedMessage
static const char RegAllocOptNotSupportedMessage[]
Definition: AMDGPUTargetMachine.cpp:1283
llvm::InferAddressSpacesPass
Definition: InferAddressSpaces.h:16
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:165
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
PassBuilder.h
llvm::createGreedyRegisterAllocator
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
Definition: RegAllocGreedy.cpp:635
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:168
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:649
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: AMDGPUTargetMachine.cpp:608
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition: AMDGPUTargetMachine.cpp:524
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:499
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition: AMDGPUTargetMachine.cpp:1368
llvm::AMDGPULowerModuleLDSPass
Definition: AMDGPU.h:158
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition: SIMemoryLegalizer.cpp:1861
llvm::SILowerSGPRSpillsID
char & SILowerSGPRSpillsID
Definition: SILowerSGPRSpills.cpp:72
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:31
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:442
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::AMDGPUAlwaysInlinePass
Definition: AMDGPU.h:251
llvm::yaml::SIMachineFunctionInfo::ArgInfo
Optional< SIArgumentInfo > ArgInfo
Definition: SIMachineFunctionInfo.h:290
SIMachineFunctionInfo.h
Scalar.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:414
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
T
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:62
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:457
llvm::Attribute
Definition: Attributes.h:52
llvm::AMDGPU::SIModeRegisterDefaults::FP32OutputDenormals
bool FP32OutputDenormals
Definition: AMDGPUBaseInfo.h:929
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:553
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition: MIRYamlMapping.h:673
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition: PHIElimination.cpp:130
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
llvm::initializeSIPreAllocateWWMRegsPass
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
InferAddressSpaces.h
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:920
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition: AlwaysInliner.cpp:169
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition: AMDGPUTargetMachine.cpp:468
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::AMDGPUPromoteAllocaToVectorPass
Definition: AMDGPU.h:236
llvm::initializeAMDGPULateCodeGenPreparePass
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition: FixIrreducible.cpp:103
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:136
llvm::createVirtRegRewriter
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
Definition: VirtRegMap.cpp:653
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
GCNSchedStrategy.h
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition: GCNIterativeScheduler.h:37
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::createAMDGPULateCodeGenPreparePass
FunctionPass * createAMDGPULateCodeGenPreparePass()
Definition: AMDGPULateCodeGenPrepare.cpp:195
llvm::createSILowerI1CopiesPass
FunctionPass * createSILowerI1CopiesPass()
Definition: SILowerI1Copies.cpp:413
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition: GCNIterativeScheduler.h:36
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition: FlattenCFGPass.cpp:52
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition: Internalize.h:36
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition: AMDGPUArgumentUsageInfo.h:129
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1735
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:31
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::initializeSILowerI1CopiesPass
void initializeSILowerI1CopiesPass(PassRegistry &)
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:400
llvm::initializeAMDGPUDAGToDAGISelPass
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::ShadowStackGCLoweringID
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
Definition: ShadowStackGCLowering.cpp:92
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:168
llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:270
llvm::AMDGPUMachineFunction::getLDSSize
unsigned getLDSSize() const
Definition: AMDGPUMachineFunction.h:70
llvm::SIOptimizeVGPRLiveRangeID
char & SIOptimizeVGPRLiveRangeID
Definition: SIOptimizeVGPRLiveRange.cpp:572
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:399
llvm::createAMDGPUUnifyMetadataPass
ModulePass * createAMDGPUUnifyMetadataPass()
InstructionSelect.h
EnableStructurizerWorkarounds
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
llvm::AMDGPUPassConfig
Definition: AMDGPUTargetMachine.h:105
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition: AMDGPUAliasAnalysis.h:62
EnableAtomicOptimizations
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:397
llvm::Optional< Reloc::Model >
llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:73
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::createBarrierNoopPass
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Definition: BarrierNoopPass.cpp:43
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:112
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
llvm::AMDGPU::SIModeRegisterDefaults::FP32InputDenormals
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition: AMDGPUBaseInfo.h:928
llvm::PassBuilder::registerAnalysisRegistrationCallback
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
Definition: PassBuilder.h:475
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
SIMachineScheduler.h
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition: SIMachineFunctionInfo.h:234
llvm::createGVNPass
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:3114
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition: AMDGPUArgumentUsageInfo.h:130
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition: R600OpenCLImageTypeLoweringPass.cpp:372
llvm::AMDGPUUseNativeCallsPass
Definition: AMDGPU.h:68
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition: AMDGPUArgumentUsageInfo.h:125
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsID
char & SIPreAllocateWWMRegsID
Definition: SIPreAllocateWWMRegs.cpp:81
llvm::initializeAMDGPUPromoteKernelArgumentsPass
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition: SIPostRABundler.cpp:69
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::initializeSIShrinkInstructionsPass
void initializeSIShrinkInstructionsPass(PassRegistry &)
LegacyPassManager.h
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition: TwoAddressInstructionPass.cpp:194
PassManagerBuilder.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition: AMDGPUTargetMachine.cpp:1374
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
Internalize.h
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:392
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:50
llvm::AMDGPUMachineFunction::Mode
AMDGPU::SIModeRegisterDefaults Mode
Definition: AMDGPUMachineFunction.h:44
llvm::AMDGPUPassConfig::addGCPasses
bool addGCPasses() override
addGCPasses - Add late codegen passes that analyze code for garbage collection.
Definition: AMDGPUTargetMachine.cpp:1054
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::createAMDGPUExternalAAWrapperPass
ImmutablePass * createAMDGPUExternalAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:37
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition: AMDGPUArgumentUsageInfo.h:128
llvm::initializeAMDGPULowerIntrinsicsPass
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::AMDGPUUnifyMetadataPass
Definition: AMDGPU.h:277
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition: AMDGPUArgumentUsageInfo.h:141
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
FunctionPassCtor
llvm::SIOptimizeExecMaskingID
char & SIOptimizeExecMaskingID
Definition: SIOptimizeExecMasking.cpp:52
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition: SIMachineFunctionInfo.h:287
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
R600.h
llvm::AMDGPUPassConfig::addIRPasses
void addIRPasses() override
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: AMDGPUTargetMachine.cpp:924
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
GCNIterativeScheduler.h
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition: AMDGPUArgumentUsageInfo.h:133
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:787
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition: InferAddressSpaces.cpp:1208
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
llvm::TargetPassConfig::TM
LLVMTargetMachine * TM
Definition: TargetPassConfig.h:122
AMDGPUAliasAnalysis.h
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:29
llvm::MSP430Attrs::CodeModel
CodeModel
Definition: MSP430Attributes.h:37
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1702
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
AlwaysInliner.h
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition: SIMachineFunctionInfo.h:233
llvm::PassBuilder::registerParseAACallback
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
Definition: PassBuilder.h:467
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition: PostRAHazardRecognizer.cpp:64
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:728
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::Legalizer
Definition: Legalizer.h:31
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:148
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
AMDGPUTargetInfo.h
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
R600TargetMachine.h
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition: FuncletLayout.cpp:39
AMDGPUMacroFusion.h
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition: SIInsertWaitcnts.cpp:802
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
EnableLDSReplaceWithPointer
static cl::opt< bool > EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition: PassBuilder.h:84
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:251
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition: SIMachineFunctionInfo.h:235
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:150
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::EarlyIfConverterID
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition: EarlyIfConversion.cpp:784
useDefaultRegisterAllocator
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
Definition: TargetPassConfig.cpp:1103
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::AMDGPUPromoteAllocaPass
Definition: AMDGPU.h:228
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3646
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1225
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition: InstructionSelect.h:31
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:750
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition: TargetRegistry.h:1275
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:318
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::Triple::r600
@ r600
Definition: Triple.h:71
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition: UnifyLoopExits.cpp:53
llvm::GCNIterativeScheduler
Definition: GCNIterativeScheduler.h:29
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:388
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:129
AMDGPUTargetObjectFile.h
llvm::AMDGPULowerKernelAttributesPass
Definition: AMDGPU.h:118
GVN.h
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:406
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
llvm::initializeAMDGPUResourceUsageAnalysisPass
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:180
llvm::AMDGPUPassConfig::addCodeGenPrepare
void addCodeGenPrepare() override
Add pass to prepare the LLVM IR for code generation.
Definition: AMDGPUTargetMachine.cpp:1018
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition: StackMapLivenessAnalysis.cpp:86
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:137
llvm::initializeAMDGPUReplaceLDSUseWithPointerPass
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition: Sink.cpp:284
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:310
llvm::createSpeculativeExecutionPass
FunctionPass * createSpeculativeExecutionPass()
Definition: SpeculativeExecution.cpp:325
Utils.h
llvm::SILoadStoreOptimizerID
char & SILoadStoreOptimizerID
Definition: SILoadStoreOptimizer.cpp:575
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::None
const NoneType None
Definition: None.h:23
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
llvm::initializeSIOptimizeVGPRLiveRangePass
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition: TargetMachine.cpp:56
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1383
llvm::SmallString< 128 >
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:122
llvm::createFunctionInliningPass
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
Definition: InlineSimple.cpp:97
llvm::legacy::PassManagerBase::add
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:75
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:33
llvm::PassManagerBuilder
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
Definition: PassManagerBuilder.h:59
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition: LowerSwitch.cpp:580
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:92
AMDGPUTargetTransformInfo.h
llvm::AMDGPUPassConfig::addInstSelector
bool addInstSelector() override
addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...
Definition: AMDGPUTargetMachine.cpp:1049
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
Passes.h
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:193
llvm::VirtRegRewriterID
char & VirtRegRewriterID
VirtRegRewriter pass.
Definition: VirtRegMap.cpp:227
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:163
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::initializeSILowerSGPRSpillsPass
void initializeSILowerSGPRSpillsPass(PassRegistry &)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:451
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:516
OptVGPRLiveRange
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition: LCSSA.cpp:484
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition: TargetMachine.h:98
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
llvm::GCLoweringID
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
Definition: GCRootLowering.cpp:88
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition: SIMachineFunctionInfo.h:286
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::initializeSIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowPass(PassRegistry &)
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3489
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition: AMDGPUArgumentUsageInfo.h:135
llvm::RegisterRegAllocBase< RegisterRegAlloc >::FunctionPassCtor
FunctionPass *(*)() FunctionPassCtor
Definition: RegAllocRegistry.h:32
llvm::EngineKind::JIT
@ JIT
Definition: ExecutionEngine.h:524
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition: DetectDeadLanes.cpp:128
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:211
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition: AMDGPUArgumentUsageInfo.h:124
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass()
Definition: AMDGPUAtomicOptimizer.cpp:707
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
IPO.h
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:191
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition: SIMachineFunctionInfo.cpp:601
llvm::createGlobalDCEPass
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:59
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
Definition: SIFixVGPRCopies.cpp:45
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
CGSCCPassManager.h
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:120
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition: GCNIterativeScheduler.h:35
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AMDGPUSimplifyLibCallsPass
Definition: AMDGPU.h:60
llvm::AMDGPUPassConfig::createMachineScheduler
ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
Definition: AMDGPUTargetMachine.cpp:1060
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: TargetPassConfig.cpp:844
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition: TargetPassConfig.cpp:1423
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition: AMDGPUArgumentUsageInfo.h:137
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition: SIFormMemoryClauses.cpp:91
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
LateCFGStructurize
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
TargetPassConfig.h
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:121
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition: AMDGPUArgumentUsageInfo.h:134
Localizer.h
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::MachineCSEID
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:153
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: TargetPassConfig.cpp:973
llvm::AMDGPU::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition: AMDGPUBaseInfo.h:924
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:220
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses
void addStraightLineScalarOptimizationPasses()
Definition: AMDGPUTargetMachine.cpp:907
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:406
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16InputDenormals
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition: AMDGPUBaseInfo.h:933
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::AMDGPUPassConfig::getAMDGPUTargetMachine
AMDGPUTargetMachine & getAMDGPUTargetMachine() const
Definition: AMDGPUTargetMachine.h:109
llvm::initializeSIOptimizeExecMaskingPass
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::SIScheduleDAGMI
Definition: SIMachineScheduler.h:425
llvm::PassBuilder::registerPipelineParsingCallback
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
Definition: PassBuilder.h:497
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:266
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::AMDGPUPassConfig::AMDGPUPassConfig
AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
Definition: AMDGPUTargetMachine.cpp:889
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:40
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:29
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition: PatchableFunction.cpp:96
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
AMDGPUExportClustering.h
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::createSIShrinkInstructionsPass
FunctionPass * createSIShrinkInstructionsPass()
llvm::createAMDGPUMachineCFGStructurizerPass
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
Definition: AMDGPUMachineCFGStructurizer.cpp:2886
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:72
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:476
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::TargetPassConfig::addPass
AnalysisID addPass(AnalysisID PassID)
Utilities for targets to add passes to the pass manager.
Definition: TargetPassConfig.cpp:772
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:752
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
computeDataLayout
static StringRef computeDataLayout(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:452
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
AMDGPU.h
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:818
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition: SIMachineFunctionInfo.h:288
SimplifyLibCalls.h
llvm::AMDGPUPassConfig::addPreISel
bool addPreISel() override
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
Definition: AMDGPUTargetMachine.cpp:1043
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
GlobalDCE.h
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition: SIMachineFunctionInfo.h:291
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:74
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:272
EnablePreRAOptimizations
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
IRTranslator.h
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:130
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:60
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition: AMDGPUArgumentUsageInfo.h:144
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition: SIWholeQuadMode.cpp:265
llvm::getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
Definition: AVRTargetMachine.cpp:40
EnableSROA
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition: TargetMachine.h:481
llvm::AMDGPUPassConfig::getCSEConfig
std::unique_ptr< CSEConfigBase > getCSEConfig() const override
Returns the CSEConfig object to use for the current optimization level.
Definition: AMDGPUTargetMachine.cpp:826
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::LLVMTargetMachine::initAsmInfo
void initAsmInfo()
Definition: LLVMTargetMachine.cpp:41
llvm::initializeAMDGPUAnnotateUniformValuesPass
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
llvm::RenameIndependentSubregsID
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Definition: RenameIndependentSubregs.cpp:113
llvm::AMDGPUPrintfRuntimeBindingPass
Definition: AMDGPU.h:268
llvm::AMDGPUReplaceLDSUseWithPointerPass
Definition: AMDGPU.h:150
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition: StructurizeCFG.cpp:1086
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: AMDGPUBaseInfo.h:934
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1359
llvm::PassManager< Module >
llvm::createAMDGPULowerKernelAttributesPass
ModulePass * createAMDGPULowerKernelAttributesPass()
Definition: AMDGPULowerKernelAttributes.cpp:258
llvm::initializeSIFixSGPRCopiesPass
void initializeSIFixSGPRCopiesPass(PassRegistry &)
llvm::PerFunctionMIParsingState
Definition: MIParser.h:162
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition: AMDGPUArgumentUsageInfo.h:136
llvm::createAMDGPUPromoteAllocaToVector
FunctionPass * createAMDGPUPromoteAllocaToVector()
Definition: AMDGPUPromoteAlloca.cpp:1164
llvm::OptimizationLevel::getSpeedupLevel
unsigned getSpeedupLevel() const
Definition: OptimizationLevel.h:121
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
LLVM_READNONE
#define LLVM_READNONE
Definition: Compiler.h:205
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:420
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3428
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
llvm::initializeSIFixVGPRCopiesPass
void initializeSIFixVGPRCopiesPass(PassRegistry &)
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition: SIMachineFunctionInfo.h:232
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
llvm::AMDGPUPromoteKernelArgumentsPass
Definition: AMDGPU.h:109
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:407
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:90
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
Definition: AMDGPUTargetMachine.cpp:612
EnablePromoteKernelArguments
static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition: TargetPassConfig.cpp:1277
llvm::AMDGPUPassConfig::addEarlyCSEOrGVNPass
void addEarlyCSEOrGVNPass()
Definition: AMDGPUTargetMachine.cpp:900
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:401
llvm::AMDGPUPropagateAttributesEarlyPass
Definition: AMDGPU.h:126
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1573
RegBankSelect.h
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:248
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: AMDGPUTargetMachine.cpp:758
llvm::PassManagerBuilder::EP_ModuleOptimizerEarly
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
Definition: PassManagerBuilder.h:76
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:157
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::ArgDescriptor::createRegister
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:44
PassManager.h
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:315
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::createAMDGPUReplaceLDSUseWithPointerPass
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:449
llvm::AMDGPUTargetMachine::adjustPassManager
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
Definition: AMDGPUTargetMachine.cpp:534
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:393
llvm::TargetPassConfig::disablePass
void disablePass(AnalysisID PassID)
Allow the target to disable a specific standard pass by default.
Definition: TargetPassConfig.h:196
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition: DeadMachineInstructionElim.cpp:57
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition: MIParser.h:164
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:845
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition: AMDGPUArgumentUsageInfo.h:127
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition: AMDGPUPromoteAlloca.cpp:1160
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1293
llvm::AMDGPUPassConfig::isPassEnabled
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOpt::Level Level=CodeGenOpt::Default) const
Check if a pass is enabled given Opt option.
Definition: AMDGPUTargetMachine.h:130
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition: BranchRelaxation.cpp:119
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition: AMDGPUCodeGenPrepare.cpp:1449
llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition: AMDGPUPromoteKernelArguments.cpp:180
llvm::RegisterRegAllocBase
RegisterRegAllocBase class - Track the registration of register allocators.
Definition: RegAllocRegistry.h:30
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:210
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:38
llvm::initializeAMDGPUAttributorPass
void initializeAMDGPUAttributorPass(PassRegistry &)
Legalizer.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:327
llvm::createAMDGPUFixFunctionBitcastsPass
ModulePass * createAMDGPUFixFunctionBitcastsPass()
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:104
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:129
llvm::PassManagerBuilder::EP_EarlyAsPossible
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
Definition: PassManagerBuilder.h:72
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::PostRASchedulerID
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
Definition: PostRASchedulerList.cpp:199
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:149
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:296
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: AMDGPUTargetMachine.cpp:764
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
N
#define N
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition: StraightLineStrengthReduce.cpp:269
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::initializeAMDGPUFixFunctionBitcastsPass
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:128
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition: GCNPreRAOptimizations.cpp:79
llvm::initializeSILoadStoreOptimizerPass
void initializeSILoadStoreOptimizerPass(PassRegistry &)
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition: LegacyPassManager.h:39
llvm::IRTranslator
Definition: IRTranslator.h:63
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:421
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
RegName
#define RegName(no)
llvm::createSIAnnotateControlFlowPass
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
Definition: SIAnnotateControlFlow.cpp:375
Vectorize.h
llvm::yaml::SIMode::IEEE
bool IEEE
Definition: SIMachineFunctionInfo.h:231
llvm::initializeAMDGPUCtorDtorLoweringPass
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &)
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::createBasicRegisterAllocator
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
Definition: RegAllocBasic.cpp:337
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition: MachineLICM.cpp:298
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:511
llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition: MachineScheduler.cpp:241
llvm::cl::desc
Definition: CommandLine.h:412
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:385
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::PassManagerBuilder::EP_CGSCCOptimizerLate
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
Definition: PassManagerBuilder.h:117
llvm::CodeGenOpt::Less
@ Less
Definition: CodeGen.h:54
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
Definition: AMDGPUTargetMachine.cpp:485
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition: TargetPassConfig.cpp:1413
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
TargetRegistry.h
llvm::createSROAPass
FunctionPass * createSROAPass()
Definition: SROA.cpp:4802
llvm::AMDGPUPropagateAttributesLatePass
Definition: AMDGPU.h:138
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
InitializePasses.h
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: SIMachineFunctionInfo.h:236
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition: SIOptimizeExecMaskingPreRA.cpp:75
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition: AMDGPUMCTargetDesc.cpp:68
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition: TargetMachine.h:108
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
EnableAMDGPUFixedFunctionABIOpt
static cl::opt< bool, true > EnableAMDGPUFixedFunctionABIOpt("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:37
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition: TargetPassConfig.h:371
llvm::TargetPassConfig::getOptLevel
CodeGenOpt::Level getOptLevel() const
Definition: TargetPassConfig.cpp:635
AMDGPUTargetMachine.h
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition: AMDGPUTargetMachine.cpp:1363
PassName
static const char PassName[]
Definition: X86LowerAMXIntrinsics.cpp:669
llvm::initializeSILowerControlFlowPass
void initializeSILowerControlFlowPass(PassRegistry &)
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
RegAllocRegistry.h
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1698
MIParser.h
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition: Localizer.h:40
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition: AMDGPUMacroFusion.cpp:62