LLVM  13.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUMacroFusion.h"
20 #include "AMDGPUTargetObjectFile.h"
22 #include "GCNIterativeScheduler.h"
23 #include "GCNSchedStrategy.h"
24 #include "R600MachineScheduler.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIMachineScheduler.h"
37 #include "llvm/IR/PassManager.h"
38 #include "llvm/InitializePasses.h"
41 #include "llvm/Transforms/IPO.h"
46 #include "llvm/Transforms/Scalar.h"
49 #include "llvm/Transforms/Utils.h"
52 
53 using namespace llvm;
54 
56  "r600-ir-structurize",
57  cl::desc("Use StructurizeCFG IR pass"),
58  cl::init(true));
59 
61  "amdgpu-sroa",
62  cl::desc("Run SROA after promote alloca pass"),
64  cl::init(true));
65 
66 static cl::opt<bool>
67 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
68  cl::desc("Run early if-conversion"),
69  cl::init(false));
70 
71 static cl::opt<bool>
72 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
73  cl::desc("Run pre-RA exec mask optimizations"),
74  cl::init(true));
75 
77  "r600-if-convert",
78  cl::desc("Use if conversion pass"),
80  cl::init(true));
81 
82 // Option to disable vectorizer for tests.
84  "amdgpu-load-store-vectorizer",
85  cl::desc("Enable load store vectorizer"),
86  cl::init(true),
87  cl::Hidden);
88 
89 // Option to control global loads scalarization
91  "amdgpu-scalarize-global-loads",
92  cl::desc("Enable global load scalarization"),
93  cl::init(true),
94  cl::Hidden);
95 
96 // Option to run internalize pass.
98  "amdgpu-internalize-symbols",
99  cl::desc("Enable elimination of non-kernel functions and unused globals"),
100  cl::init(false),
101  cl::Hidden);
102 
103 // Option to inline all early.
105  "amdgpu-early-inline-all",
106  cl::desc("Inline all functions early"),
107  cl::init(false),
108  cl::Hidden);
109 
111  "amdgpu-sdwa-peephole",
112  cl::desc("Enable SDWA peepholer"),
113  cl::init(true));
114 
116  "amdgpu-dpp-combine",
117  cl::desc("Enable DPP combiner"),
118  cl::init(true));
119 
120 // Enable address space based alias analysis
121 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
122  cl::desc("Enable AMDGPU Alias Analysis"),
123  cl::init(true));
124 
125 // Option to run late CFG structurizer
127  "amdgpu-late-structurize",
128  cl::desc("Enable late CFG structurization"),
130  cl::Hidden);
131 
133  "amdgpu-function-calls",
134  cl::desc("Enable AMDGPU function call support"),
136  cl::init(true),
137  cl::Hidden);
138 
140  "amdgpu-fixed-function-abi",
141  cl::desc("Enable all implicit function arguments"),
143  cl::init(false),
144  cl::Hidden);
145 
146 // Enable lib calls simplifications
148  "amdgpu-simplify-libcall",
149  cl::desc("Enable amdgpu library simplifications"),
150  cl::init(true),
151  cl::Hidden);
152 
154  "amdgpu-ir-lower-kernel-arguments",
155  cl::desc("Lower kernel argument loads in IR pass"),
156  cl::init(true),
157  cl::Hidden);
158 
160  "amdgpu-reassign-regs",
161  cl::desc("Enable register reassign optimizations on gfx10+"),
162  cl::init(true),
163  cl::Hidden);
164 
165 // Enable atomic optimization
167  "amdgpu-atomic-optimizations",
168  cl::desc("Enable atomic optimizations"),
169  cl::init(false),
170  cl::Hidden);
171 
172 // Enable Mode register optimization
174  "amdgpu-mode-register",
175  cl::desc("Enable mode register pass"),
176  cl::init(true),
177  cl::Hidden);
178 
179 // Option is used in lit tests to prevent deadcoding of patterns inspected.
180 static cl::opt<bool>
181 EnableDCEInRA("amdgpu-dce-in-ra",
182  cl::init(true), cl::Hidden,
183  cl::desc("Enable machine DCE inside regalloc"));
184 
186  "amdgpu-scalar-ir-passes",
187  cl::desc("Enable scalar IR passes"),
188  cl::init(true),
189  cl::Hidden);
190 
192  "amdgpu-enable-structurizer-workarounds",
193  cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
194  cl::Hidden);
195 
197  "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
199  cl::Hidden);
200 
202  // Register the target
205 
267 }
268 
269 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
270  return std::make_unique<AMDGPUTargetObjectFile>();
271 }
272 
274  return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
275 }
276 
278  return new SIScheduleDAGMI(C);
279 }
280 
281 static ScheduleDAGInstrs *
283  ScheduleDAGMILive *DAG =
284  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
288  return DAG;
289 }
290 
291 static ScheduleDAGInstrs *
293  auto DAG = new GCNIterativeScheduler(C,
295  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
296  return DAG;
297 }
298 
300  return new GCNIterativeScheduler(C,
302 }
303 
304 static ScheduleDAGInstrs *
306  auto DAG = new GCNIterativeScheduler(C,
308  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
309  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
310  return DAG;
311 }
312 
314 R600SchedRegistry("r600", "Run R600's custom scheduler",
316 
318 SISchedRegistry("si", "Run SI's custom scheduler",
320 
322 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
323  "Run GCN scheduler to maximize occupancy",
325 
327 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
328  "Run GCN scheduler to maximize occupancy (experimental)",
330 
332 GCNMinRegSchedRegistry("gcn-minreg",
333  "Run GCN iterative scheduler for minimal register usage (experimental)",
335 
337 GCNILPSchedRegistry("gcn-ilp",
338  "Run GCN iterative scheduler for ILP scheduling (experimental)",
340 
341 static StringRef computeDataLayout(const Triple &TT) {
342  if (TT.getArch() == Triple::r600) {
343  // 32-bit pointers.
344  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
345  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
346  }
347 
348  // 32-bit private, local, and region pointers. 64-bit global, constant and
349  // flat, non-integral buffer fat pointers.
350  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
351  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
352  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
353  "-ni:7";
354 }
355 
357 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
358  if (!GPU.empty())
359  return GPU;
360 
361  // Need to default to a target with flat support for HSA.
362  if (TT.getArch() == Triple::amdgcn)
363  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
364 
365  return "r600";
366 }
367 
369  // The AMDGPU toolchain only supports generating shared objects, so we
370  // must always use PIC.
371  return Reloc::PIC_;
372 }
373 
375  StringRef CPU, StringRef FS,
376  TargetOptions Options,
379  CodeGenOpt::Level OptLevel)
381  FS, Options, getEffectiveRelocModel(RM),
382  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
383  TLOF(createTLOF(getTargetTriple())) {
384  initAsmInfo();
385  if (TT.getArch() == Triple::amdgcn) {
386  if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
388  else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
390  }
391 }
392 
397 
399 
401  Attribute GPUAttr = F.getFnAttribute("target-cpu");
402  return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
403 }
404 
406  Attribute FSAttr = F.getFnAttribute("target-features");
407 
408  return FSAttr.isValid() ? FSAttr.getValueAsString()
410 }
411 
412 /// Predicate for Internalize pass.
413 static bool mustPreserveGV(const GlobalValue &GV) {
414  if (const Function *F = dyn_cast<Function>(&GV))
415  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
416 
417  return !GV.use_empty();
418 }
419 
421  Builder.DivergentTarget = true;
422 
423  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
424  bool Internalize = InternalizeSymbols;
425  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
426  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
427  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
428 
429  if (EnableFunctionCalls) {
430  delete Builder.Inliner;
432  }
433 
434  Builder.addExtension(
436  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
438  if (AMDGPUAA) {
441  }
444  if (Internalize)
447  if (Internalize)
448  PM.add(createGlobalDCEPass());
449  if (EarlyInline)
451  });
452 
453  Builder.addExtension(
455  [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
457  if (AMDGPUAA) {
460  }
463  if (LibCallSimplify)
465  });
466 
467  Builder.addExtension(
469  [EnableOpt](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
470  // Add infer address spaces pass to the opt pipeline after inlining
471  // but before SROA to increase SROA opportunities.
473 
474  // This should run after inlining to have any chance of doing anything,
475  // and before other cleanup optimizations.
477 
478  // Promote alloca to vector before SROA and loop unroll. If we manage
479  // to eliminate allocas before unroll we may choose to unroll less.
480  if (EnableOpt)
482  });
483 }
484 
487 }
488 
490  bool DebugPassManager) {
494  if (PassName == "amdgpu-propagate-attributes-late") {
496  return true;
497  }
498  if (PassName == "amdgpu-unify-metadata") {
500  return true;
501  }
502  if (PassName == "amdgpu-printf-runtime-binding") {
504  return true;
505  }
506  if (PassName == "amdgpu-always-inline") {
508  return true;
509  }
510  if (PassName == "amdgpu-lower-module-lds") {
512  return true;
513  }
514  return false;
515  });
519  if (PassName == "amdgpu-simplifylib") {
521  return true;
522  }
523  if (PassName == "amdgpu-usenative") {
525  return true;
526  }
527  if (PassName == "amdgpu-promote-alloca") {
528  PM.addPass(AMDGPUPromoteAllocaPass(*this));
529  return true;
530  }
531  if (PassName == "amdgpu-promote-alloca-to-vector") {
533  return true;
534  }
535  if (PassName == "amdgpu-lower-kernel-attributes") {
537  return true;
538  }
539  if (PassName == "amdgpu-propagate-attributes-early") {
541  return true;
542  }
543  return false;
544  });
545 
547  FAM.registerPass([&] { return AMDGPUAA(); });
548  });
549 
550  PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
551  if (AAName == "amdgpu-aa") {
553  return true;
554  }
555  return false;
556  });
557 
558  PB.registerPipelineStartEPCallback([this, DebugPassManager](
559  ModulePassManager &PM,
561  FunctionPassManager FPM(DebugPassManager);
567  });
568 
572  return;
573 
576 
577  if (InternalizeSymbols) {
578  // Global variables may have dead uses which need to be removed.
579  // Otherwise these useless global variables will not get internalized.
580  PM.addPass(GlobalDCEPass());
582  }
584  if (InternalizeSymbols) {
585  PM.addPass(GlobalDCEPass());
586  }
589  });
590 
592  [this, DebugPassManager](CGSCCPassManager &PM,
595  return;
596 
597  FunctionPassManager FPM(DebugPassManager);
598 
599  // Add infer address spaces pass to the opt pipeline after inlining
600  // but before SROA to increase SROA opportunities.
602 
603  // This should run after inlining to have any chance of doing
604  // anything, and before other cleanup optimizations.
606 
608  // Promote alloca to vector before SROA and loop unroll. If we
609  // manage to eliminate allocas before unroll we may choose to unroll
610  // less.
612  }
613 
615  });
616 }
617 
618 //===----------------------------------------------------------------------===//
619 // R600 Target Machine (R600 -> Cayman)
620 //===----------------------------------------------------------------------===//
621 
623  StringRef CPU, StringRef FS,
624  TargetOptions Options,
627  CodeGenOpt::Level OL, bool JIT)
628  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
630 
631  // Override the default since calls aren't supported for r600.
632  if (EnableFunctionCalls &&
633  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
634  EnableFunctionCalls = false;
635 }
636 
638  const Function &F) const {
639  StringRef GPU = getGPUName(F);
641 
642  SmallString<128> SubtargetKey(GPU);
643  SubtargetKey.append(FS);
644 
645  auto &I = SubtargetMap[SubtargetKey];
646  if (!I) {
647  // This needs to be done before we create a new subtarget since any
648  // creation will depend on the TM and the code generation flags on the
649  // function that reside in TargetOptions.
651  I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
652  }
653 
654  return I.get();
655 }
656 
657 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
658  return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
659  AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
660  AddrSpace == AMDGPUAS::REGION_ADDRESS)
661  ? -1
662  : 0;
663 }
664 
666  unsigned DestAS) const {
667  return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
669 }
670 
672  const auto *LD = dyn_cast<LoadInst>(V);
673  if (!LD)
675 
676  // It must be a generic pointer loaded.
677  assert(V->getType()->isPointerTy() &&
679 
680  const auto *Ptr = LD->getPointerOperand();
681  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
683  // For a generic pointer loaded from the constant memory, it could be assumed
684  // as a global pointer since the constant memory is only populated on the
685  // host side. As implied by the offload programming model, only global
686  // pointers could be referenced on the host side.
688 }
689 
692  return TargetTransformInfo(R600TTIImpl(this, F));
693 }
694 
695 //===----------------------------------------------------------------------===//
696 // GCN Target Machine (SI+)
697 //===----------------------------------------------------------------------===//
698 
700  StringRef CPU, StringRef FS,
701  TargetOptions Options,
704  CodeGenOpt::Level OL, bool JIT)
705  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
706 
708  StringRef GPU = getGPUName(F);
710 
711  SmallString<128> SubtargetKey(GPU);
712  SubtargetKey.append(FS);
713 
714  auto &I = SubtargetMap[SubtargetKey];
715  if (!I) {
716  // This needs to be done before we create a new subtarget since any
717  // creation will depend on the TM and the code generation flags on the
718  // function that reside in TargetOptions.
720  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
721  }
722 
723  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
724 
725  return I.get();
726 }
727 
730  return TargetTransformInfo(GCNTTIImpl(this, F));
731 }
732 
733 //===----------------------------------------------------------------------===//
734 // AMDGPU Pass Setup
735 //===----------------------------------------------------------------------===//
736 
737 namespace {
738 
739 class AMDGPUPassConfig : public TargetPassConfig {
740 public:
741  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
742  : TargetPassConfig(TM, PM) {
743  // Exceptions and StackMaps are not supported, so these passes will never do
744  // anything.
745  disablePass(&StackMapLivenessID);
746  disablePass(&FuncletLayoutID);
747  }
748 
749  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
750  return getTM<AMDGPUTargetMachine>();
751  }
752 
754  createMachineScheduler(MachineSchedContext *C) const override {
757  return DAG;
758  }
759 
760  void addEarlyCSEOrGVNPass();
761  void addStraightLineScalarOptimizationPasses();
762  void addIRPasses() override;
763  void addCodeGenPrepare() override;
764  bool addPreISel() override;
765  bool addInstSelector() override;
766  bool addGCPasses() override;
767 
768  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
769 };
770 
771 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
772  return getStandardCSEConfigForOpt(TM->getOptLevel());
773 }
774 
775 class R600PassConfig final : public AMDGPUPassConfig {
776 public:
777  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
778  : AMDGPUPassConfig(TM, PM) {}
779 
780  ScheduleDAGInstrs *createMachineScheduler(
781  MachineSchedContext *C) const override {
783  }
784 
785  bool addPreISel() override;
786  bool addInstSelector() override;
787  void addPreRegAlloc() override;
788  void addPreSched2() override;
789  void addPreEmitPass() override;
790 };
791 
792 class GCNPassConfig final : public AMDGPUPassConfig {
793 public:
794  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
795  : AMDGPUPassConfig(TM, PM) {
796  // It is necessary to know the register usage of the entire call graph. We
797  // allow calls without EnableAMDGPUFunctionCalls if they are marked
798  // noinline, so this is always required.
799  setRequiresCodeGenSCCOrder(true);
800  }
801 
802  GCNTargetMachine &getGCNTargetMachine() const {
803  return getTM<GCNTargetMachine>();
804  }
805 
807  createMachineScheduler(MachineSchedContext *C) const override;
808 
809  bool addPreISel() override;
810  void addMachineSSAOptimization() override;
811  bool addILPOpts() override;
812  bool addInstSelector() override;
813  bool addIRTranslator() override;
814  void addPreLegalizeMachineIR() override;
815  bool addLegalizeMachineIR() override;
816  void addPreRegBankSelect() override;
817  bool addRegBankSelect() override;
818  void addPreGlobalInstructionSelect() override;
819  bool addGlobalInstructionSelect() override;
820  void addFastRegAlloc() override;
821  void addOptimizedRegAlloc() override;
822  void addPreRegAlloc() override;
823  bool addPreRewrite() override;
824  void addPostRegAlloc() override;
825  void addPreSched2() override;
826  void addPreEmitPass() override;
827 };
828 
829 } // end anonymous namespace
830 
831 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
832  if (getOptLevel() == CodeGenOpt::Aggressive)
833  addPass(createGVNPass());
834  else
835  addPass(createEarlyCSEPass());
836 }
837 
838 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
839  addPass(createLICMPass());
842  // ReassociateGEPs exposes more opportunites for SLSR. See
843  // the example in reassociate-geps-and-slsr.ll.
845  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
846  // EarlyCSE can reuse.
847  addEarlyCSEOrGVNPass();
848  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
849  addPass(createNaryReassociatePass());
850  // NaryReassociate on GEPs creates redundant common expressions, so run
851  // EarlyCSE after it.
852  addPass(createEarlyCSEPass());
853 }
854 
855 void AMDGPUPassConfig::addIRPasses() {
856  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
857 
858  // There is no reason to run these.
859  disablePass(&StackMapLivenessID);
860  disablePass(&FuncletLayoutID);
861  disablePass(&PatchableFunctionID);
862 
864 
865  // This must occur before inlining, as the inliner will not look through
866  // bitcast calls.
868 
869  // A call to propagate attributes pass in the backend in case opt was not run.
871 
872  addPass(createAtomicExpandPass());
873 
874 
876 
877  // Function calls are not supported, so make sure we inline everything.
878  addPass(createAMDGPUAlwaysInlinePass());
880  // We need to add the barrier noop pass, otherwise adding the function
881  // inlining pass will cause all of the PassConfigs passes to be run
882  // one function at a time, which means if we have a nodule with two
883  // functions, then we will generate code for the first function
884  // without ever running any passes on the second.
885  addPass(createBarrierNoopPass());
886 
887  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
888  if (TM.getTargetTriple().getArch() == Triple::r600)
890 
891  // Replace OpenCL enqueued block function pointers with global variables.
893 
894  // Can increase LDS used by kernel so runs before PromoteAlloca
897 
898  if (TM.getOptLevel() > CodeGenOpt::None) {
899  addPass(createInferAddressSpacesPass());
900  addPass(createAMDGPUPromoteAlloca());
901 
902  if (EnableSROA)
903  addPass(createSROAPass());
904 
906  addStraightLineScalarOptimizationPasses();
907 
909  addPass(createAMDGPUAAWrapperPass());
911  AAResults &AAR) {
912  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
913  AAR.addAAResult(WrapperPass->getResult());
914  }));
915  }
916  }
917 
918  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
919  // TODO: May want to move later or split into an early and late one.
921  }
922 
924 
925  // EarlyCSE is not always strong enough to clean up what LSR produces. For
926  // example, GVN can combine
927  //
928  // %0 = add %a, %b
929  // %1 = add %b, %a
930  //
931  // and
932  //
933  // %0 = shl nsw %a, 2
934  // %1 = shl %a, 2
935  //
936  // but EarlyCSE can do neither of them.
937  if (getOptLevel() != CodeGenOpt::None && EnableScalarIRPasses)
938  addEarlyCSEOrGVNPass();
939 }
940 
941 void AMDGPUPassConfig::addCodeGenPrepare() {
942  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
944 
945  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
948 
949  addPass(&AMDGPUPerfHintAnalysisID);
950 
952 
955 
956  // LowerSwitch pass may introduce unreachable blocks that can
957  // cause unexpected behavior for subsequent passes. Placing it
958  // here seems better that these blocks would get cleaned up by
959  // UnreachableBlockElim inserted next in the pass flow.
960  addPass(createLowerSwitchPass());
961 }
962 
963 bool AMDGPUPassConfig::addPreISel() {
964  addPass(createFlattenCFGPass());
965  return false;
966 }
967 
968 bool AMDGPUPassConfig::addInstSelector() {
969  // Defer the verifier until FinalizeISel.
970  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
971  return false;
972 }
973 
974 bool AMDGPUPassConfig::addGCPasses() {
975  // Do nothing. GC is not supported.
976  return false;
977 }
978 
979 //===----------------------------------------------------------------------===//
980 // R600 Pass Setup
981 //===----------------------------------------------------------------------===//
982 
983 bool R600PassConfig::addPreISel() {
984  AMDGPUPassConfig::addPreISel();
985 
987  addPass(createStructurizeCFGPass());
988  return false;
989 }
990 
991 bool R600PassConfig::addInstSelector() {
992  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
993  return false;
994 }
995 
996 void R600PassConfig::addPreRegAlloc() {
997  addPass(createR600VectorRegMerger());
998 }
999 
1000 void R600PassConfig::addPreSched2() {
1001  addPass(createR600EmitClauseMarkers(), false);
1002  if (EnableR600IfConvert)
1003  addPass(&IfConverterID, false);
1004  addPass(createR600ClauseMergePass(), false);
1005 }
1006 
1007 void R600PassConfig::addPreEmitPass() {
1008  addPass(createAMDGPUCFGStructurizerPass(), false);
1009  addPass(createR600ExpandSpecialInstrsPass(), false);
1010  addPass(&FinalizeMachineBundlesID, false);
1011  addPass(createR600Packetizer(), false);
1012  addPass(createR600ControlFlowFinalizer(), false);
1013 }
1014 
1016  return new R600PassConfig(*this, PM);
1017 }
1018 
1019 //===----------------------------------------------------------------------===//
1020 // GCN Pass Setup
1021 //===----------------------------------------------------------------------===//
1022 
1023 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1024  MachineSchedContext *C) const {
1025  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1026  if (ST.enableSIScheduler())
1027  return createSIMachineScheduler(C);
1029 }
1030 
1031 bool GCNPassConfig::addPreISel() {
1032  AMDGPUPassConfig::addPreISel();
1033 
1037  }
1038 
1039  // FIXME: We need to run a pass to propagate the attributes when calls are
1040  // supported.
1041 
1042  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1043  // regions formed by them.
1045  if (!LateCFGStructurize) {
1047  addPass(createFixIrreduciblePass());
1048  addPass(createUnifyLoopExitsPass());
1049  }
1050  addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1051  }
1052  addPass(createSinkingPass());
1054  if (!LateCFGStructurize) {
1056  }
1057  addPass(createLCSSAPass());
1058 
1059  return false;
1060 }
1061 
1062 void GCNPassConfig::addMachineSSAOptimization() {
1064 
1065  // We want to fold operands after PeepholeOptimizer has run (or as part of
1066  // it), because it will eliminate extra copies making it easier to fold the
1067  // real source operand. We want to eliminate dead instructions after, so that
1068  // we see fewer uses of the copies. We then need to clean up the dead
1069  // instructions leftover after the operands are folded as well.
1070  //
1071  // XXX - Can we get away without running DeadMachineInstructionElim again?
1072  addPass(&SIFoldOperandsID);
1073  if (EnableDPPCombine)
1074  addPass(&GCNDPPCombineID);
1075  addPass(&DeadMachineInstructionElimID);
1076  addPass(&SILoadStoreOptimizerID);
1077  if (EnableSDWAPeephole) {
1078  addPass(&SIPeepholeSDWAID);
1079  addPass(&EarlyMachineLICMID);
1080  addPass(&MachineCSEID);
1081  addPass(&SIFoldOperandsID);
1082  addPass(&DeadMachineInstructionElimID);
1083  }
1084  addPass(createSIShrinkInstructionsPass());
1085 }
1086 
1087 bool GCNPassConfig::addILPOpts() {
1089  addPass(&EarlyIfConverterID);
1090 
1092  return false;
1093 }
1094 
1095 bool GCNPassConfig::addInstSelector() {
1096  AMDGPUPassConfig::addInstSelector();
1097  addPass(&SIFixSGPRCopiesID);
1098  addPass(createSILowerI1CopiesPass());
1099  return false;
1100 }
1101 
1102 bool GCNPassConfig::addIRTranslator() {
1103  addPass(new IRTranslator(getOptLevel()));
1104  return false;
1105 }
1106 
1107 void GCNPassConfig::addPreLegalizeMachineIR() {
1108  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1109  addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1110  addPass(new Localizer());
1111 }
1112 
1113 bool GCNPassConfig::addLegalizeMachineIR() {
1114  addPass(new Legalizer());
1115  return false;
1116 }
1117 
1118 void GCNPassConfig::addPreRegBankSelect() {
1119  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1120  addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1121 }
1122 
1123 bool GCNPassConfig::addRegBankSelect() {
1124  addPass(new RegBankSelect());
1125  return false;
1126 }
1127 
1128 void GCNPassConfig::addPreGlobalInstructionSelect() {
1129  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1130  addPass(createAMDGPURegBankCombiner(IsOptNone));
1131 }
1132 
1133 bool GCNPassConfig::addGlobalInstructionSelect() {
1134  addPass(new InstructionSelect(getOptLevel()));
1135  return false;
1136 }
1137 
1138 void GCNPassConfig::addPreRegAlloc() {
1139  if (LateCFGStructurize) {
1141  }
1142 }
1143 
1144 void GCNPassConfig::addFastRegAlloc() {
1145  // FIXME: We have to disable the verifier here because of PHIElimination +
1146  // TwoAddressInstructions disabling it.
1147 
1148  // This must be run immediately after phi elimination and before
1149  // TwoAddressInstructions, otherwise the processing of the tied operand of
1150  // SI_ELSE will introduce a copy of the tied operand source after the else.
1151  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
1152 
1155 
1157 }
1158 
1159 void GCNPassConfig::addOptimizedRegAlloc() {
1160  // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1161  // instructions that cause scheduling barriers.
1162  insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1164 
1165  if (OptExecMaskPreRA)
1168 
1169  // This must be run immediately after phi elimination and before
1170  // TwoAddressInstructions, otherwise the processing of the tied operand of
1171  // SI_ELSE will introduce a copy of the tied operand source after the else.
1172  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
1173 
1174  if (EnableDCEInRA)
1176 
1178 }
1179 
1180 bool GCNPassConfig::addPreRewrite() {
1181  if (EnableRegReassign) {
1182  addPass(&GCNNSAReassignID);
1184  }
1185  return true;
1186 }
1187 
1188 void GCNPassConfig::addPostRegAlloc() {
1189  addPass(&SIFixVGPRCopiesID);
1190  if (getOptLevel() > CodeGenOpt::None)
1191  addPass(&SIOptimizeExecMaskingID);
1193 
1194  // Equivalent of PEI for SGPRs.
1195  addPass(&SILowerSGPRSpillsID);
1196 }
1197 
1198 void GCNPassConfig::addPreSched2() {
1199  addPass(&SIPostRABundlerID);
1200 }
1201 
1202 void GCNPassConfig::addPreEmitPass() {
1203  addPass(createSIMemoryLegalizerPass());
1204  addPass(createSIInsertWaitcntsPass());
1205  addPass(createSIShrinkInstructionsPass());
1206  addPass(createSIModeRegisterPass());
1207 
1208  if (getOptLevel() > CodeGenOpt::None)
1209  addPass(&SIInsertHardClausesID);
1210 
1211  addPass(&SILateBranchLoweringPassID);
1212  if (getOptLevel() > CodeGenOpt::None)
1213  addPass(&SIPreEmitPeepholeID);
1214  // The hazard recognizer that runs as part of the post-ra scheduler does not
1215  // guarantee to be able handle all hazards correctly. This is because if there
1216  // are multiple scheduling regions in a basic block, the regions are scheduled
1217  // bottom up, so when we begin to schedule a region we don't know what
1218  // instructions were emitted directly before it.
1219  //
1220  // Here we add a stand-alone hazard recognizer pass which can handle all
1221  // cases.
1222  addPass(&PostRAHazardRecognizerID);
1223  addPass(&BranchRelaxationPassID);
1224 }
1225 
1227  return new GCNPassConfig(*this, PM);
1228 }
1229 
1231  return new yaml::SIMachineFunctionInfo();
1232 }
1233 
1237  return new yaml::SIMachineFunctionInfo(*MFI,
1238  *MF.getSubtarget().getRegisterInfo());
1239 }
1240 
1243  SMDiagnostic &Error, SMRange &SourceRange) const {
1244  const yaml::SIMachineFunctionInfo &YamlMFI =
1245  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1246  MachineFunction &MF = PFS.MF;
1248 
1249  MFI->initializeBaseYamlFields(YamlMFI);
1250 
1251  if (MFI->Occupancy == 0) {
1252  // Fixup the subtarget dependent default value.
1253  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1254  MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1255  }
1256 
1257  auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1258  Register TempReg;
1259  if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1260  SourceRange = RegName.SourceRange;
1261  return true;
1262  }
1263  RegVal = TempReg;
1264 
1265  return false;
1266  };
1267 
1268  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1269  // Create a diagnostic for a the register string literal.
1270  const MemoryBuffer &Buffer =
1271  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1272  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1273  RegName.Value.size(), SourceMgr::DK_Error,
1274  "incorrect register class for field", RegName.Value,
1275  None, None);
1276  SourceRange = RegName.SourceRange;
1277  return true;
1278  };
1279 
1280  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1281  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1282  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1283  return true;
1284 
1285  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1286  !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1287  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1288  }
1289 
1290  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1291  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1292  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1293  }
1294 
1295  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1296  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1297  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1298  }
1299 
1300  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1301  const TargetRegisterClass &RC,
1302  ArgDescriptor &Arg, unsigned UserSGPRs,
1303  unsigned SystemSGPRs) {
1304  // Skip parsing if it's not present.
1305  if (!A)
1306  return false;
1307 
1308  if (A->IsRegister) {
1309  Register Reg;
1310  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1311  SourceRange = A->RegisterName.SourceRange;
1312  return true;
1313  }
1314  if (!RC.contains(Reg))
1315  return diagnoseRegisterClass(A->RegisterName);
1317  } else
1318  Arg = ArgDescriptor::createStack(A->StackOffset);
1319  // Check and apply the optional mask.
1320  if (A->Mask)
1321  Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
1322 
1323  MFI->NumUserSGPRs += UserSGPRs;
1324  MFI->NumSystemSGPRs += SystemSGPRs;
1325  return false;
1326  };
1327 
1328  if (YamlMFI.ArgInfo &&
1329  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1330  AMDGPU::SGPR_128RegClass,
1331  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1332  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1333  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1334  2, 0) ||
1335  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1336  MFI->ArgInfo.QueuePtr, 2, 0) ||
1337  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1338  AMDGPU::SReg_64RegClass,
1339  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1340  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1341  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1342  2, 0) ||
1343  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1344  AMDGPU::SReg_64RegClass,
1345  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1346  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1347  AMDGPU::SGPR_32RegClass,
1348  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1349  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1350  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1351  0, 1) ||
1352  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1353  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1354  0, 1) ||
1355  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1356  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1357  0, 1) ||
1358  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1359  AMDGPU::SGPR_32RegClass,
1360  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1361  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1362  AMDGPU::SGPR_32RegClass,
1363  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1364  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1365  AMDGPU::SReg_64RegClass,
1366  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1367  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1368  AMDGPU::SReg_64RegClass,
1369  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1370  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1371  AMDGPU::VGPR_32RegClass,
1372  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1373  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1374  AMDGPU::VGPR_32RegClass,
1375  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1376  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1377  AMDGPU::VGPR_32RegClass,
1378  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1379  return true;
1380 
1381  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1382  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1387 
1388  return false;
1389 }
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition: AliasAnalysis.h:465
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition: TargetPassConfig.h:415
llvm::createR600ExpandSpecialInstrsPass
FunctionPass * createR600ExpandSpecialInstrsPass()
Definition: R600ExpandSpecialInstrs.cpp:57
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:198
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1221
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: AMDGPUAliasAnalysis.h:50
llvm::ArgDescriptor::createStack
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:49
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition: AMDGPUArgumentUsageInfo.h:126
llvm::AMDGPUTargetMachine::EnableFixedFunctionABI
static bool EnableFixedFunctionABI
Definition: AMDGPUTargetMachine.h:37
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition: AMDGPUTargetMachine.cpp:201
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:413
llvm::InferAddressSpacesPass
Definition: InferAddressSpaces.h:16
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:163
llvm
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:155
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: AMDGPUTargetMachine.cpp:485
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition: AMDGPUTargetMachine.cpp:413
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:499
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:156
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition: AMDGPUTargetMachine.cpp:1235
llvm::AMDGPULowerModuleLDSPass
Definition: AMDGPU.h:162
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition: SIMemoryLegalizer.cpp:1791
llvm::SILowerSGPRSpillsID
char & SILowerSGPRSpillsID
Definition: SILowerSGPRSpills.cpp:78
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:34
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:608
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::TargetOptions
Definition: TargetOptions.h:123
llvm::AMDGPUAlwaysInlinePass
Definition: AMDGPU.h:270
llvm::yaml::SIMachineFunctionInfo::ArgInfo
Optional< SIArgumentInfo > ArgInfo
Definition: SIMachineFunctionInfo.h:289
SIMachineFunctionInfo.h
Scalar.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:299
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:61
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:456
llvm::Attribute
Definition: Attributes.h:52
llvm::AMDGPU::SIModeRegisterDefaults::FP32OutputDenormals
bool FP32OutputDenormals
Definition: AMDGPUBaseInfo.h:899
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition: MIRYamlMapping.h:626
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition: PHIElimination.cpp:129
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
llvm::initializeSIPreAllocateWWMRegsPass
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
InferAddressSpaces.h
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:890
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition: AlwaysInliner.cpp:169
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition: AMDGPUTargetMachine.cpp:357
R600MachineScheduler.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:124
llvm::AMDGPUPromoteAllocaToVectorPass
Definition: AMDGPU.h:255
llvm::initializeAMDGPULateCodeGenPreparePass
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition: FixIrreducible.cpp:103
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:135
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
GCNSchedStrategy.h
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition: GCNIterativeScheduler.h:37
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::createAMDGPULateCodeGenPreparePass
FunctionPass * createAMDGPULateCodeGenPreparePass()
Definition: AMDGPULateCodeGenPrepare.cpp:193
llvm::createSILowerI1CopiesPass
FunctionPass * createSILowerI1CopiesPass()
Definition: SILowerI1Copies.cpp:413
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition: GCNIterativeScheduler.h:36
llvm::AMDGPU::RM_BOTH
@ RM_BOTH
Definition: AMDGPU.h:81
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition: FlattenCFGPass.cpp:52
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition: Internalize.h:36
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition: AMDGPUArgumentUsageInfo.h:129
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1699
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:34
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::initializeSILowerI1CopiesPass
void initializeSILowerI1CopiesPass(PassRegistry &)
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:390
llvm::initializeAMDGPUDAGToDAGISelPass
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:165
llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:269
llvm::AMDGPUMachineFunction::getLDSSize
unsigned getLDSSize() const
Definition: AMDGPUMachineFunction.h:70
llvm::createAMDGPUUnifyMetadataPass
ModulePass * createAMDGPUUnifyMetadataPass()
InstructionSelect.h
EnableStructurizerWorkarounds
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition: AMDGPUAliasAnalysis.h:64
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:494
EnableAtomicOptimizations
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:282
llvm::Optional< Reloc::Model >
llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:73
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::createBarrierNoopPass
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Definition: BarrierNoopPass.cpp:43
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:380
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
llvm::AMDGPU::SIModeRegisterDefaults::FP32InputDenormals
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition: AMDGPUBaseInfo.h:898
llvm::PassBuilder::registerAnalysisRegistrationCallback
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
Definition: PassBuilder.h:641
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
SIMachineScheduler.h
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition: SIMachineFunctionInfo.h:233
llvm::createGVNPass
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2967
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition: AMDGPUArgumentUsageInfo.h:130
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition: R600OpenCLImageTypeLoweringPass.cpp:372
llvm::createR600ClauseMergePass
FunctionPass * createR600ClauseMergePass()
Definition: R600ClauseMergePass.cpp:209
llvm::AMDGPUUseNativeCallsPass
Definition: AMDGPU.h:95
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition: AMDGPUArgumentUsageInfo.h:125
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsID
char & SIPreAllocateWWMRegsID
Definition: SIPreAllocateWWMRegs.cpp:81
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition: SIPostRABundler.cpp:69
llvm::initializeSIShrinkInstructionsPass
void initializeSIShrinkInstructionsPass(PassRegistry &)
LegacyPassManager.h
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition: TwoAddressInstructionPass.cpp:192
PassManagerBuilder.h
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:141
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition: AMDGPUTargetMachine.cpp:1241
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
Internalize.h
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:277
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:50
llvm::AMDGPUMachineFunction::Mode
AMDGPU::SIModeRegisterDefaults Mode
Definition: AMDGPUMachineFunction.h:44
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::createAMDGPUExternalAAWrapperPass
ImmutablePass * createAMDGPUExternalAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:36
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition: AMDGPUArgumentUsageInfo.h:128
llvm::initializeAMDGPULowerIntrinsicsPass
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::AMDGPUUnifyMetadataPass
Definition: AMDGPU.h:294
llvm::PassBuilder::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: PassBuilder.h:183
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition: AMDGPUArgumentUsageInfo.h:141
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::SIOptimizeExecMaskingID
char & SIOptimizeExecMaskingID
Definition: SIOptimizeExecMasking.cpp:52
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition: SIMachineFunctionInfo.h:286
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
GCNIterativeScheduler.h
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition: AMDGPUArgumentUsageInfo.h:133
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:699
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition: InferAddressSpaces.cpp:1199
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
AMDGPUAliasAnalysis.h
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:27
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1702
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::createR600Packetizer
FunctionPass * createR600Packetizer()
Definition: R600Packetizer.cpp:411
AlwaysInliner.h
llvm::R600TargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:691
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition: SIMachineFunctionInfo.h:232
llvm::PassBuilder::registerParseAACallback
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
Definition: PassBuilder.h:633
llvm::X86AS::FS
@ FS
Definition: X86.h:183
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition: PostRAHazardRecognizer.cpp:64
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::Legalizer
Definition: Legalizer.h:31
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:148
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
AMDGPUTargetInfo.h
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition: FuncletLayout.cpp:39
AMDGPUMacroFusion.h
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition: SIInsertWaitcnts.cpp:797
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition: PassBuilder.h:139
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:251
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition: SIMachineFunctionInfo.h:234
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:186
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:373
llvm::createR600EmitClauseMarkers
FunctionPass * createR600EmitClauseMarkers()
Definition: R600EmitClauseMarkers.cpp:336
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::EarlyIfConverterID
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition: EarlyIfConversion.cpp:730
llvm::AMDGPUPromoteAllocaPass
Definition: AMDGPU.h:247
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition: InstructionSelect.h:31
llvm::initializeGCNRegBankReassignPass
void initializeGCNRegBankReassignPass(PassRegistry &)
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:657
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition: TargetRegistry.h:1118
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:317
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::Triple::r600
@ r600
Definition: Triple.h:71
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition: UnifyLoopExits.cpp:53
llvm::GCNIterativeScheduler
Definition: GCNIterativeScheduler.h:29
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:269
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:129
AMDGPUTargetObjectFile.h
llvm::AMDGPULowerKernelAttributesPass
Definition: AMDGPU.h:130
GVN.h
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:406
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:180
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition: StackMapLivenessAnalysis.cpp:86
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:424
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition: Sink.cpp:284
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:374
llvm::createSpeculativeExecutionPass
FunctionPass * createSpeculativeExecutionPass()
Definition: SpeculativeExecution.cpp:325
Utils.h
llvm::SILoadStoreOptimizerID
char & SILoadStoreOptimizerID
Definition: SILoadStoreOptimizer.cpp:576
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
llvm::None
const NoneType None
Definition: None.h:23
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:357
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition: TargetMachine.cpp:56
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1336
llvm::SmallString< 128 >
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:122
llvm::createFunctionInliningPass
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
Definition: InlineSimple.cpp:97
llvm::legacy::PassManagerBase::add
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
llvm::R600TTIImpl
Definition: AMDGPUTargetTransformInfo.h:225
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:378
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:75
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:32
llvm::PassManagerBuilder
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
Definition: PassManagerBuilder.h:59
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition: LowerSwitch.cpp:582
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:92
AMDGPUTargetTransformInfo.h
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:190
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:158
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::initializeSILowerSGPRSpillsPass
void initializeSILowerSGPRSpillsPass(PassRegistry &)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:617
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:405
R600SchedRegistry
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
llvm::cl::opt< bool >
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition: LCSSA.cpp:484
EnableR600StructurizeCFG
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1228
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition: TargetMachine.h:96
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
llvm::R600Subtarget
Definition: R600Subtarget.h:36
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition: SIMachineFunctionInfo.h:285
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:73
llvm::R600TargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1015
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
llvm::TargetMachine::setRequiresStructuredCFG
void setRequiresStructuredCFG(bool Value)
Definition: TargetMachine.h:214
llvm::initializeSIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowPass(PassRegistry &)
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3445
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition: AMDGPUArgumentUsageInfo.h:135
llvm::EngineKind::JIT
@ JIT
Definition: ExecutionEngine.h:525
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition: DetectDeadLanes.cpp:128
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:206
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition: AMDGPUArgumentUsageInfo.h:124
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass()
Definition: AMDGPUAtomicOptimizer.cpp:707
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
IPO.h
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:191
llvm::createGlobalDCEPass
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
llvm::FinalizeMachineBundlesID
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier,...
Definition: MachineInstrBundle.cpp:98
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:62
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
Definition: SIFixVGPRCopies.cpp:45
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
CGSCCPassManager.h
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:119
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition: GCNIterativeScheduler.h:35
createR600MachineScheduler
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:273
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AMDGPUSimplifyLibCallsPass
Definition: AMDGPU.h:87
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: TargetPassConfig.cpp:800
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition: TargetPassConfig.cpp:1353
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition: AMDGPUArgumentUsageInfo.h:137
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition: SIFormMemoryClauses.cpp:92
LateCFGStructurize
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
TargetPassConfig.h
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:121
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition: AMDGPUArgumentUsageInfo.h:134
Localizer.h
llvm::MachineCSEID
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:153
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: TargetPassConfig.cpp:924
llvm::AMDGPU::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition: AMDGPUBaseInfo.h:894
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:201
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:420
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16InputDenormals
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition: AMDGPUBaseInfo.h:903
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::initializeSIOptimizeExecMaskingPass
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::SIScheduleDAGMI
Definition: SIMachineScheduler.h:426
llvm::PassBuilder::registerPipelineParsingCallback
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
Definition: PassBuilder.h:663
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:38
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:29
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition: PatchableFunction.cpp:96
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
AMDGPUExportClustering.h
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::createSIShrinkInstructionsPass
FunctionPass * createSIShrinkInstructionsPass()
llvm::createAMDGPUMachineCFGStructurizerPass
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
Definition: AMDGPUMachineCFGStructurizer.cpp:2886
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:96
EnableAMDGPUFunctionCallsOpt
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:467
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
computeDataLayout
static StringRef computeDataLayout(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:341
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
AMDGPU.h
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:729
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition: SIMachineFunctionInfo.h:287
SimplifyLibCalls.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
GlobalDCE.h
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition: SIMachineFunctionInfo.h:290
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:73
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:149
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
Definition: SIMachineFunctionInfo.cpp:570
IRTranslator.h
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:125
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition: AMDGPUArgumentUsageInfo.h:144
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition: SIWholeQuadMode.cpp:265
llvm::getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
Definition: AVRTargetMachine.cpp:39
EnableSROA
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition: TargetMachine.h:474
llvm::LLVMTargetMachine::initAsmInfo
void initAsmInfo()
Definition: LLVMTargetMachine.cpp:41
llvm::initializeAMDGPUAnnotateUniformValuesPass
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
llvm::AMDGPUPrintfRuntimeBindingPass
Definition: AMDGPU.h:285
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
Definition: LoadStoreVectorizer.cpp:229
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition: StructurizeCFG.cpp:1086
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: AMDGPUBaseInfo.h:904
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1226
llvm::PassManager< Module >
llvm::createAMDGPULowerKernelAttributesPass
ModulePass * createAMDGPULowerKernelAttributesPass()
Definition: AMDGPULowerKernelAttributes.cpp:258
llvm::initializeSIFixSGPRCopiesPass
void initializeSIFixSGPRCopiesPass(PassRegistry &)
llvm::PerFunctionMIParsingState
Definition: MIParser.h:160
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition: AMDGPUArgumentUsageInfo.h:136
llvm::createAMDGPUPromoteAllocaToVector
FunctionPass * createAMDGPUPromoteAllocaToVector()
Definition: AMDGPUPromoteAlloca.cpp:1144
llvm::R600TargetMachine::R600TargetMachine
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:622
llvm::createR600VectorRegMerger
FunctionPass * createR600VectorRegMerger()
Definition: R600OptimizeVectorRegisters.cpp:385
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
LLVM_READNONE
#define LLVM_READNONE
Definition: Compiler.h:205
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:305
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3238
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
llvm::initializeSIFixVGPRCopiesPass
void initializeSIFixVGPRCopiesPass(PassRegistry &)
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition: SIMachineFunctionInfo.h:231
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
EnableR600IfConvert
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:292
llvm::PassBuilder::OptimizationLevel
LLVM-provided high-level optimization levels.
Definition: PassBuilder.h:164
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::createR600ControlFlowFinalizer
FunctionPass * createR600ControlFlowFinalizer()
Definition: R600ControlFlowFinalizer.cpp:689
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition: TargetPassConfig.cpp:1211
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:401
llvm::AMDGPUPropagateAttributesEarlyPass
Definition: AMDGPU.h:138
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:377
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1573
RegBankSelect.h
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:248
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: AMDGPUTargetMachine.cpp:665
llvm::PassManagerBuilder::EP_ModuleOptimizerEarly
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
Definition: PassManagerBuilder.h:76
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:157
llvm::ArgDescriptor::createRegister
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:44
PassManager.h
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:288
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::AMDGPUTargetMachine::adjustPassManager
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
Definition: AMDGPUTargetMachine.cpp:420
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:386
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition: DeadMachineInstructionElim.cpp:57
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition: MIParser.h:162
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:847
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition: AMDGPUArgumentUsageInfo.h:127
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition: AMDGPUPromoteAlloca.cpp:1140
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1226
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition: BranchRelaxation.cpp:119
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition: AMDGPUCodeGenPrepare.cpp:1418
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:210
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:36
Legalizer.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:310
llvm::createAMDGPUFixFunctionBitcastsPass
ModulePass * createAMDGPUFixFunctionBitcastsPass()
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:104
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:124
llvm::PassManagerBuilder::EP_EarlyAsPossible
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
Definition: PassManagerBuilder.h:72
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:149
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:296
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: AMDGPUTargetMachine.cpp:671
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition: StraightLineStrengthReduce.cpp:269
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:331
llvm::initializeAMDGPUFixFunctionBitcastsPass
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
llvm::initializeSILoadStoreOptimizerPass
void initializeSILoadStoreOptimizerPass(PassRegistry &)
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition: LegacyPassManager.h:39
llvm::IRTranslator
Definition: IRTranslator.h:62
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:587
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
RegName
#define RegName(no)
llvm::createSIAnnotateControlFlowPass
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
Definition: SIAnnotateControlFlow.cpp:374
Vectorize.h
llvm::yaml::SIMode::IEEE
bool IEEE
Definition: SIMachineFunctionInfo.h:230
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:372
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition: MachineLICM.cpp:295
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:400
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:376
llvm::cl::desc
Definition: CommandLine.h:411
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:384
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::PassManagerBuilder::EP_CGSCCOptimizerLate
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
Definition: PassManagerBuilder.h:117
llvm::createAMDGPUCFGStructurizerPass
FunctionPass * createAMDGPUCFGStructurizerPass()
Definition: AMDILCFGStructurizer.cpp:1654
llvm::createR600ISelDag
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Definition: AMDGPUISelDAGToDAG.cpp:387
llvm::IfConverterID
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
Definition: IfConversion.cpp:436
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
Definition: AMDGPUTargetMachine.cpp:374
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition: TargetPassConfig.cpp:1343
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:57
TargetRegistry.h
llvm::createSROAPass
FunctionPass * createSROAPass()
Definition: SROA.cpp:4829
llvm::AMDGPUPropagateAttributesLatePass
Definition: AMDGPU.h:150
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
InitializePasses.h
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: SIMachineFunctionInfo.h:235
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition: SIOptimizeExecMaskingPreRA.cpp:75
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition: AMDGPUMCTargetDesc.cpp:68
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition: TargetMachine.h:106
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
EnableAMDGPUFixedFunctionABIOpt
static cl::opt< bool, true > EnableAMDGPUFixedFunctionABIOpt("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
llvm::createGCNRegBankReassignPass
MachineFunctionPass * createGCNRegBankReassignPass(AMDGPU::RegBankReassignMode Mode)
Definition: GCNRegBankReassign.cpp:898
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:35
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
Definition: AMDGPUTargetMachine.cpp:489
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition: TargetPassConfig.h:373
AMDGPUTargetMachine.h
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition: AMDGPUTargetMachine.cpp:1230
PassBuilder.h
llvm::PassManager::addPass
std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT Pass)
Definition: PassManager.h:553
PassName
static const char PassName[]
Definition: X86LowerAMXIntrinsics.cpp:666
llvm::initializeSILowerControlFlowPass
void initializeSILowerControlFlowPass(PassRegistry &)
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1698
MIParser.h
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition: Localizer.h:40
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition: AMDGPUMacroFusion.cpp:62