LLVM  10.0.0svn
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPUMacroFusion.h"
22 #include "AMDGPUTargetObjectFile.h"
24 #include "GCNIterativeScheduler.h"
25 #include "GCNSchedStrategy.h"
26 #include "R600MachineScheduler.h"
27 #include "SIMachineFunctionInfo.h"
28 #include "SIMachineScheduler.h"
35 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
40 #include "llvm/Pass.h"
42 #include "llvm/Support/Compiler.h"
45 #include "llvm/Transforms/IPO.h"
48 #include "llvm/Transforms/Scalar.h"
50 #include "llvm/Transforms/Utils.h"
52 #include <memory>
53 
54 using namespace llvm;
55 
57  "r600-ir-structurize",
58  cl::desc("Use StructurizeCFG IR pass"),
59  cl::init(true));
60 
62  "amdgpu-sroa",
63  cl::desc("Run SROA after promote alloca pass"),
65  cl::init(true));
66 
67 static cl::opt<bool>
68 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
69  cl::desc("Run early if-conversion"),
70  cl::init(false));
71 
72 static cl::opt<bool>
73 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
74  cl::desc("Run pre-RA exec mask optimizations"),
75  cl::init(true));
76 
78  "r600-if-convert",
79  cl::desc("Use if conversion pass"),
81  cl::init(true));
82 
83 // Option to disable vectorizer for tests.
85  "amdgpu-load-store-vectorizer",
86  cl::desc("Enable load store vectorizer"),
87  cl::init(true),
88  cl::Hidden);
89 
90 // Option to control global loads scalarization
92  "amdgpu-scalarize-global-loads",
93  cl::desc("Enable global load scalarization"),
94  cl::init(true),
95  cl::Hidden);
96 
97 // Option to run internalize pass.
99  "amdgpu-internalize-symbols",
100  cl::desc("Enable elimination of non-kernel functions and unused globals"),
101  cl::init(false),
102  cl::Hidden);
103 
104 // Option to inline all early.
106  "amdgpu-early-inline-all",
107  cl::desc("Inline all functions early"),
108  cl::init(false),
109  cl::Hidden);
110 
112  "amdgpu-sdwa-peephole",
113  cl::desc("Enable SDWA peepholer"),
114  cl::init(true));
115 
117  "amdgpu-dpp-combine",
118  cl::desc("Enable DPP combiner"),
119  cl::init(true));
120 
121 // Enable address space based alias analysis
122 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
123  cl::desc("Enable AMDGPU Alias Analysis"),
124  cl::init(true));
125 
126 // Option to run late CFG structurizer
128  "amdgpu-late-structurize",
129  cl::desc("Enable late CFG structurization"),
131  cl::Hidden);
132 
134  "amdgpu-function-calls",
135  cl::desc("Enable AMDGPU function call support"),
137  cl::init(true),
138  cl::Hidden);
139 
140 // Enable lib calls simplifications
142  "amdgpu-simplify-libcall",
143  cl::desc("Enable amdgpu library simplifications"),
144  cl::init(true),
145  cl::Hidden);
146 
148  "amdgpu-ir-lower-kernel-arguments",
149  cl::desc("Lower kernel argument loads in IR pass"),
150  cl::init(true),
151  cl::Hidden);
152 
154  "amdgpu-reassign-regs",
155  cl::desc("Enable register reassign optimizations on gfx10+"),
156  cl::init(true),
157  cl::Hidden);
158 
159 // Enable atomic optimization
161  "amdgpu-atomic-optimizations",
162  cl::desc("Enable atomic optimizations"),
163  cl::init(false),
164  cl::Hidden);
165 
166 // Enable Mode register optimization
168  "amdgpu-mode-register",
169  cl::desc("Enable mode register pass"),
170  cl::init(true),
171  cl::Hidden);
172 
173 // Option is used in lit tests to prevent deadcoding of patterns inspected.
174 static cl::opt<bool>
175 EnableDCEInRA("amdgpu-dce-in-ra",
176  cl::init(true), cl::Hidden,
177  cl::desc("Enable machine DCE inside regalloc"));
178 
180  "amdgpu-scalar-ir-passes",
181  cl::desc("Enable scalar IR passes"),
182  cl::init(true),
183  cl::Hidden);
184 
185 extern "C" void LLVMInitializeAMDGPUTarget() {
186  // Register the target
189 
244 }
245 
246 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
247  return std::make_unique<AMDGPUTargetObjectFile>();
248 }
249 
251  return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
252 }
253 
255  return new SIScheduleDAGMI(C);
256 }
257 
258 static ScheduleDAGInstrs *
260  ScheduleDAGMILive *DAG =
261  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
265  return DAG;
266 }
267 
268 static ScheduleDAGInstrs *
270  auto DAG = new GCNIterativeScheduler(C,
272  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
273  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
274  return DAG;
275 }
276 
278  return new GCNIterativeScheduler(C,
280 }
281 
282 static ScheduleDAGInstrs *
284  auto DAG = new GCNIterativeScheduler(C,
286  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
287  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
288  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
289  return DAG;
290 }
291 
293 R600SchedRegistry("r600", "Run R600's custom scheduler",
295 
297 SISchedRegistry("si", "Run SI's custom scheduler",
299 
301 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
302  "Run GCN scheduler to maximize occupancy",
304 
306 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
307  "Run GCN scheduler to maximize occupancy (experimental)",
309 
311 GCNMinRegSchedRegistry("gcn-minreg",
312  "Run GCN iterative scheduler for minimal register usage (experimental)",
314 
316 GCNILPSchedRegistry("gcn-ilp",
317  "Run GCN iterative scheduler for ILP scheduling (experimental)",
319 
321  if (TT.getArch() == Triple::r600) {
322  // 32-bit pointers.
323  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
324  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
325  }
326 
327  // 32-bit private, local, and region pointers. 64-bit global, constant and
328  // flat, non-integral buffer fat pointers.
329  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
330  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
331  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
332  "-ni:7";
333 }
334 
337  if (!GPU.empty())
338  return GPU;
339 
340  // Need to default to a target with flat support for HSA.
341  if (TT.getArch() == Triple::amdgcn)
342  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
343 
344  return "r600";
345 }
346 
348  // The AMDGPU toolchain only supports generating shared objects, so we
349  // must always use PIC.
350  return Reloc::PIC_;
351 }
352 
354  StringRef CPU, StringRef FS,
355  TargetOptions Options,
358  CodeGenOpt::Level OptLevel)
359  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
360  FS, Options, getEffectiveRelocModel(RM),
361  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
362  TLOF(createTLOF(getTargetTriple())) {
363  initAsmInfo();
364 }
365 
368 
370 
372  Attribute GPUAttr = F.getFnAttribute("target-cpu");
373  return GPUAttr.hasAttribute(Attribute::None) ?
374  getTargetCPU() : GPUAttr.getValueAsString();
375 }
376 
378  Attribute FSAttr = F.getFnAttribute("target-features");
379 
380  return FSAttr.hasAttribute(Attribute::None) ?
382  FSAttr.getValueAsString();
383 }
384 
385 /// Predicate for Internalize pass.
386 static bool mustPreserveGV(const GlobalValue &GV) {
387  if (const Function *F = dyn_cast<Function>(&GV))
388  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
389 
390  return !GV.use_empty();
391 }
392 
394  Builder.DivergentTarget = true;
395 
396  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
397  bool Internalize = InternalizeSymbols;
398  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
399  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
400  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
401 
402  if (EnableFunctionCalls) {
403  delete Builder.Inliner;
405  }
406 
407  Builder.addExtension(
409  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
411  if (AMDGPUAA) {
414  }
418  if (Internalize) {
420  PM.add(createGlobalDCEPass());
421  }
422  if (EarlyInline)
424  });
425 
426  const auto &Opt = Options;
427  Builder.addExtension(
429  [AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &,
431  if (AMDGPUAA) {
434  }
437  if (LibCallSimplify)
439  });
440 
441  Builder.addExtension(
444  // Add infer address spaces pass to the opt pipeline after inlining
445  // but before SROA to increase SROA opportunities.
447 
448  // This should run after inlining to have any chance of doing anything,
449  // and before other cleanup optimizations.
451  });
452 }
453 
454 //===----------------------------------------------------------------------===//
455 // R600 Target Machine (R600 -> Cayman)
456 //===----------------------------------------------------------------------===//
457 
459  StringRef CPU, StringRef FS,
463  CodeGenOpt::Level OL, bool JIT)
464  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
466 
467  // Override the default since calls aren't supported for r600.
468  if (EnableFunctionCalls &&
469  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
470  EnableFunctionCalls = false;
471 }
472 
474  const Function &F) const {
475  StringRef GPU = getGPUName(F);
476  StringRef FS = getFeatureString(F);
477 
478  SmallString<128> SubtargetKey(GPU);
479  SubtargetKey.append(FS);
480 
481  auto &I = SubtargetMap[SubtargetKey];
482  if (!I) {
483  // This needs to be done before we create a new subtarget since any
484  // creation will depend on the TM and the code generation flags on the
485  // function that reside in TargetOptions.
487  I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
488  }
489 
490  return I.get();
491 }
492 
495  return TargetTransformInfo(R600TTIImpl(this, F));
496 }
497 
498 //===----------------------------------------------------------------------===//
499 // GCN Target Machine (SI+)
500 //===----------------------------------------------------------------------===//
501 
503  StringRef CPU, StringRef FS,
507  CodeGenOpt::Level OL, bool JIT)
508  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
509 
511  StringRef GPU = getGPUName(F);
512  StringRef FS = getFeatureString(F);
513 
514  SmallString<128> SubtargetKey(GPU);
515  SubtargetKey.append(FS);
516 
517  auto &I = SubtargetMap[SubtargetKey];
518  if (!I) {
519  // This needs to be done before we create a new subtarget since any
520  // creation will depend on the TM and the code generation flags on the
521  // function that reside in TargetOptions.
523  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
524  }
525 
526  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
527 
528  return I.get();
529 }
530 
533  return TargetTransformInfo(GCNTTIImpl(this, F));
534 }
535 
536 //===----------------------------------------------------------------------===//
537 // AMDGPU Pass Setup
538 //===----------------------------------------------------------------------===//
539 
540 namespace {
541 
542 class AMDGPUPassConfig : public TargetPassConfig {
543 public:
544  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
545  : TargetPassConfig(TM, PM) {
546  // Exceptions and StackMaps are not supported, so these passes will never do
547  // anything.
548  disablePass(&StackMapLivenessID);
549  disablePass(&FuncletLayoutID);
550  }
551 
552  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
553  return getTM<AMDGPUTargetMachine>();
554  }
555 
557  createMachineScheduler(MachineSchedContext *C) const override {
561  return DAG;
562  }
563 
564  void addEarlyCSEOrGVNPass();
565  void addStraightLineScalarOptimizationPasses();
566  void addIRPasses() override;
567  void addCodeGenPrepare() override;
568  bool addPreISel() override;
569  bool addInstSelector() override;
570  bool addGCPasses() override;
571 
572  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
573 };
574 
575 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
576  return getStandardCSEConfigForOpt(TM->getOptLevel());
577 }
578 
579 class R600PassConfig final : public AMDGPUPassConfig {
580 public:
581  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
582  : AMDGPUPassConfig(TM, PM) {}
583 
584  ScheduleDAGInstrs *createMachineScheduler(
585  MachineSchedContext *C) const override {
586  return createR600MachineScheduler(C);
587  }
588 
589  bool addPreISel() override;
590  bool addInstSelector() override;
591  void addPreRegAlloc() override;
592  void addPreSched2() override;
593  void addPreEmitPass() override;
594 };
595 
596 class GCNPassConfig final : public AMDGPUPassConfig {
597 public:
598  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
599  : AMDGPUPassConfig(TM, PM) {
600  // It is necessary to know the register usage of the entire call graph. We
601  // allow calls without EnableAMDGPUFunctionCalls if they are marked
602  // noinline, so this is always required.
603  setRequiresCodeGenSCCOrder(true);
604  }
605 
606  GCNTargetMachine &getGCNTargetMachine() const {
607  return getTM<GCNTargetMachine>();
608  }
609 
611  createMachineScheduler(MachineSchedContext *C) const override;
612 
613  bool addPreISel() override;
614  void addMachineSSAOptimization() override;
615  bool addILPOpts() override;
616  bool addInstSelector() override;
617  bool addIRTranslator() override;
618  bool addLegalizeMachineIR() override;
619  bool addRegBankSelect() override;
620  bool addGlobalInstructionSelect() override;
621  void addFastRegAlloc() override;
622  void addOptimizedRegAlloc() override;
623  void addPreRegAlloc() override;
624  bool addPreRewrite() override;
625  void addPostRegAlloc() override;
626  void addPreSched2() override;
627  void addPreEmitPass() override;
628 };
629 
630 } // end anonymous namespace
631 
632 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
634  addPass(createGVNPass());
635  else
636  addPass(createEarlyCSEPass());
637 }
638 
639 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
640  addPass(createLICMPass());
643  // ReassociateGEPs exposes more opportunites for SLSR. See
644  // the example in reassociate-geps-and-slsr.ll.
646  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
647  // EarlyCSE can reuse.
648  addEarlyCSEOrGVNPass();
649  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
650  addPass(createNaryReassociatePass());
651  // NaryReassociate on GEPs creates redundant common expressions, so run
652  // EarlyCSE after it.
653  addPass(createEarlyCSEPass());
654 }
655 
656 void AMDGPUPassConfig::addIRPasses() {
657  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
658 
659  // There is no reason to run these.
660  disablePass(&StackMapLivenessID);
661  disablePass(&FuncletLayoutID);
662  disablePass(&PatchableFunctionID);
663 
665 
666  // This must occur before inlining, as the inliner will not look through
667  // bitcast calls.
669 
670  // A call to propagate attributes pass in the backend in case opt was not run.
672 
673  addPass(createAtomicExpandPass());
674 
675 
677 
678  // Function calls are not supported, so make sure we inline everything.
679  addPass(createAMDGPUAlwaysInlinePass());
681  // We need to add the barrier noop pass, otherwise adding the function
682  // inlining pass will cause all of the PassConfigs passes to be run
683  // one function at a time, which means if we have a nodule with two
684  // functions, then we will generate code for the first function
685  // without ever running any passes on the second.
686  addPass(createBarrierNoopPass());
687 
688  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
689  // TODO: May want to move later or split into an early and late one.
690 
692  }
693 
694  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
695  if (TM.getTargetTriple().getArch() == Triple::r600)
697 
698  // Replace OpenCL enqueued block function pointers with global variables.
700 
701  if (TM.getOptLevel() > CodeGenOpt::None) {
702  addPass(createInferAddressSpacesPass());
703  addPass(createAMDGPUPromoteAlloca());
704 
705  if (EnableSROA)
706  addPass(createSROAPass());
707 
709  addStraightLineScalarOptimizationPasses();
710 
712  addPass(createAMDGPUAAWrapperPass());
714  AAResults &AAR) {
715  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
716  AAR.addAAResult(WrapperPass->getResult());
717  }));
718  }
719  }
720 
722 
723  // EarlyCSE is not always strong enough to clean up what LSR produces. For
724  // example, GVN can combine
725  //
726  // %0 = add %a, %b
727  // %1 = add %b, %a
728  //
729  // and
730  //
731  // %0 = shl nsw %a, 2
732  // %1 = shl %a, 2
733  //
734  // but EarlyCSE can do neither of them.
736  addEarlyCSEOrGVNPass();
737 }
738 
739 void AMDGPUPassConfig::addCodeGenPrepare() {
740  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
742 
743  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
746 
747  addPass(&AMDGPUPerfHintAnalysisID);
748 
750 
753 }
754 
755 bool AMDGPUPassConfig::addPreISel() {
756  addPass(createLowerSwitchPass());
757  addPass(createFlattenCFGPass());
758  return false;
759 }
760 
761 bool AMDGPUPassConfig::addInstSelector() {
762  // Defer the verifier until FinalizeISel.
763  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
764  return false;
765 }
766 
767 bool AMDGPUPassConfig::addGCPasses() {
768  // Do nothing. GC is not supported.
769  return false;
770 }
771 
772 //===----------------------------------------------------------------------===//
773 // R600 Pass Setup
774 //===----------------------------------------------------------------------===//
775 
776 bool R600PassConfig::addPreISel() {
777  AMDGPUPassConfig::addPreISel();
778 
780  addPass(createStructurizeCFGPass());
781  return false;
782 }
783 
784 bool R600PassConfig::addInstSelector() {
785  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
786  return false;
787 }
788 
789 void R600PassConfig::addPreRegAlloc() {
790  addPass(createR600VectorRegMerger());
791 }
792 
793 void R600PassConfig::addPreSched2() {
794  addPass(createR600EmitClauseMarkers(), false);
796  addPass(&IfConverterID, false);
797  addPass(createR600ClauseMergePass(), false);
798 }
799 
800 void R600PassConfig::addPreEmitPass() {
801  addPass(createAMDGPUCFGStructurizerPass(), false);
802  addPass(createR600ExpandSpecialInstrsPass(), false);
803  addPass(&FinalizeMachineBundlesID, false);
804  addPass(createR600Packetizer(), false);
805  addPass(createR600ControlFlowFinalizer(), false);
806 }
807 
809  return new R600PassConfig(*this, PM);
810 }
811 
812 //===----------------------------------------------------------------------===//
813 // GCN Pass Setup
814 //===----------------------------------------------------------------------===//
815 
816 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
817  MachineSchedContext *C) const {
818  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
819  if (ST.enableSIScheduler())
820  return createSIMachineScheduler(C);
822 }
823 
824 bool GCNPassConfig::addPreISel() {
825  AMDGPUPassConfig::addPreISel();
826 
829  }
830 
831  // FIXME: We need to run a pass to propagate the attributes when calls are
832  // supported.
833 
834  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
835  // regions formed by them.
837  if (!LateCFGStructurize) {
838  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
839  }
840  addPass(createSinkingPass());
842  if (!LateCFGStructurize) {
844  }
845  addPass(createLCSSAPass());
846 
847  return false;
848 }
849 
850 void GCNPassConfig::addMachineSSAOptimization() {
852 
853  // We want to fold operands after PeepholeOptimizer has run (or as part of
854  // it), because it will eliminate extra copies making it easier to fold the
855  // real source operand. We want to eliminate dead instructions after, so that
856  // we see fewer uses of the copies. We then need to clean up the dead
857  // instructions leftover after the operands are folded as well.
858  //
859  // XXX - Can we get away without running DeadMachineInstructionElim again?
860  addPass(&SIFoldOperandsID);
861  if (EnableDPPCombine)
862  addPass(&GCNDPPCombineID);
864  addPass(&SILoadStoreOptimizerID);
865  if (EnableSDWAPeephole) {
866  addPass(&SIPeepholeSDWAID);
867  addPass(&EarlyMachineLICMID);
868  addPass(&MachineCSEID);
869  addPass(&SIFoldOperandsID);
871  }
873 }
874 
875 bool GCNPassConfig::addILPOpts() {
877  addPass(&EarlyIfConverterID);
878 
880  return false;
881 }
882 
883 bool GCNPassConfig::addInstSelector() {
884  AMDGPUPassConfig::addInstSelector();
885  addPass(&SIFixSGPRCopiesID);
886  addPass(createSILowerI1CopiesPass());
887  addPass(createSIFixupVectorISelPass());
888  addPass(createSIAddIMGInitPass());
889  // FIXME: Remove this once the phi on CF_END is cleaned up by either removing
890  // LCSSA or other ways.
892  return false;
893 }
894 
895 bool GCNPassConfig::addIRTranslator() {
896  addPass(new IRTranslator());
897  return false;
898 }
899 
900 bool GCNPassConfig::addLegalizeMachineIR() {
901  addPass(new Legalizer());
902  return false;
903 }
904 
905 bool GCNPassConfig::addRegBankSelect() {
906  addPass(new RegBankSelect());
907  return false;
908 }
909 
910 bool GCNPassConfig::addGlobalInstructionSelect() {
911  addPass(new InstructionSelect());
912  return false;
913 }
914 
915 void GCNPassConfig::addPreRegAlloc() {
916  if (LateCFGStructurize) {
918  }
919  addPass(createSIWholeQuadModePass());
920 }
921 
922 void GCNPassConfig::addFastRegAlloc() {
923  // FIXME: We have to disable the verifier here because of PHIElimination +
924  // TwoAddressInstructions disabling it.
925 
926  // This must be run immediately after phi elimination and before
927  // TwoAddressInstructions, otherwise the processing of the tied operand of
928  // SI_ELSE will introduce a copy of the tied operand source after the else.
929  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
930 
931  // This must be run just after RegisterCoalescing.
932  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
933 
935 }
936 
937 void GCNPassConfig::addOptimizedRegAlloc() {
938  if (OptExecMaskPreRA) {
941  } else {
943  }
944 
945  // This must be run immediately after phi elimination and before
946  // TwoAddressInstructions, otherwise the processing of the tied operand of
947  // SI_ELSE will introduce a copy of the tied operand source after the else.
948  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
949 
950  // This must be run just after RegisterCoalescing.
951  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
952 
953  if (EnableDCEInRA)
955 
957 }
958 
959 bool GCNPassConfig::addPreRewrite() {
960  if (EnableRegReassign) {
961  addPass(&GCNNSAReassignID);
962  addPass(&GCNRegBankReassignID);
963  }
964  return true;
965 }
966 
967 void GCNPassConfig::addPostRegAlloc() {
968  addPass(&SIFixVGPRCopiesID);
970  addPass(&SIOptimizeExecMaskingID);
972 
973  // Equivalent of PEI for SGPRs.
974  addPass(&SILowerSGPRSpillsID);
975 }
976 
977 void GCNPassConfig::addPreSched2() {
978 }
979 
980 void GCNPassConfig::addPreEmitPass() {
981  addPass(createSIMemoryLegalizerPass());
982  addPass(createSIInsertWaitcntsPass());
984  addPass(createSIModeRegisterPass());
985 
986  // The hazard recognizer that runs as part of the post-ra scheduler does not
987  // guarantee to be able handle all hazards correctly. This is because if there
988  // are multiple scheduling regions in a basic block, the regions are scheduled
989  // bottom up, so when we begin to schedule a region we don't know what
990  // instructions were emitted directly before it.
991  //
992  // Here we add a stand-alone hazard recognizer pass which can handle all
993  // cases.
994  //
995  // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
996  // be better for it to emit S_NOP <N> when possible.
997  addPass(&PostRAHazardRecognizerID);
998 
999  addPass(&SIInsertSkipsPassID);
1000  addPass(&BranchRelaxationPassID);
1001 }
1002 
1004  return new GCNPassConfig(*this, PM);
1005 }
1006 
1008  return new yaml::SIMachineFunctionInfo();
1009 }
1010 
1014  return new yaml::SIMachineFunctionInfo(*MFI,
1015  *MF.getSubtarget().getRegisterInfo());
1016 }
1017 
1020  SMDiagnostic &Error, SMRange &SourceRange) const {
1021  const yaml::SIMachineFunctionInfo &YamlMFI =
1022  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1023  MachineFunction &MF = PFS.MF;
1025 
1026  MFI->initializeBaseYamlFields(YamlMFI);
1027 
1028  auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
1029  if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
1030  SourceRange = RegName.SourceRange;
1031  return true;
1032  }
1033 
1034  return false;
1035  };
1036 
1037  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1038  // Create a diagnostic for a the register string literal.
1039  const MemoryBuffer &Buffer =
1040  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1041  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1042  RegName.Value.size(), SourceMgr::DK_Error,
1043  "incorrect register class for field", RegName.Value,
1044  None, None);
1045  SourceRange = RegName.SourceRange;
1046  return true;
1047  };
1048 
1049  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1050  parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
1051  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1052  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1053  return true;
1054 
1055  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1056  !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
1057  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1058  }
1059 
1060  if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
1061  !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
1062  return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
1063  }
1064 
1065  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1066  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1067  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1068  }
1069 
1070  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1071  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1072  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1073  }
1074 
1075  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1076  const TargetRegisterClass &RC,
1077  ArgDescriptor &Arg, unsigned UserSGPRs,
1078  unsigned SystemSGPRs) {
1079  // Skip parsing if it's not present.
1080  if (!A)
1081  return false;
1082 
1083  if (A->IsRegister) {
1084  unsigned Reg;
1085  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1086  SourceRange = A->RegisterName.SourceRange;
1087  return true;
1088  }
1089  if (!RC.contains(Reg))
1090  return diagnoseRegisterClass(A->RegisterName);
1091  Arg = ArgDescriptor::createRegister(Reg);
1092  } else
1094  // Check and apply the optional mask.
1095  if (A->Mask)
1096  Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
1097 
1098  MFI->NumUserSGPRs += UserSGPRs;
1099  MFI->NumSystemSGPRs += SystemSGPRs;
1100  return false;
1101  };
1102 
1103  if (YamlMFI.ArgInfo &&
1104  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1105  AMDGPU::SReg_128RegClass,
1106  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1107  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1108  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1109  2, 0) ||
1110  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1111  MFI->ArgInfo.QueuePtr, 2, 0) ||
1112  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1113  AMDGPU::SReg_64RegClass,
1114  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1115  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1116  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1117  2, 0) ||
1118  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1119  AMDGPU::SReg_64RegClass,
1120  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1121  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1122  AMDGPU::SGPR_32RegClass,
1123  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1124  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1125  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1126  0, 1) ||
1127  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1128  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1129  0, 1) ||
1130  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1131  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1132  0, 1) ||
1133  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1134  AMDGPU::SGPR_32RegClass,
1135  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1136  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1137  AMDGPU::SGPR_32RegClass,
1138  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1139  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1140  AMDGPU::SReg_64RegClass,
1141  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1142  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1143  AMDGPU::SReg_64RegClass,
1144  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1145  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1146  AMDGPU::VGPR_32RegClass,
1147  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1148  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1149  AMDGPU::VGPR_32RegClass,
1150  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1151  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1152  AMDGPU::VGPR_32RegClass,
1153  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1154  return true;
1155 
1156  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1157  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1158 
1159  return false;
1160 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:69
Represents a range in source code.
Definition: SMLoc.h:48
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2602
StringRef getTargetFeatureString() const
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
char & GCNNSAReassignID
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:84
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeSIModeRegisterPass(PassRegistry &)
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
unsigned Reg
FunctionPass * createLowerSwitchPass()
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
Pass * createLCSSAPass()
Definition: LCSSA.cpp:466
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target&#39;s MachineFunctionInfo from the YAML reprsentation.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
FunctionPass * createSIAddIMGInitPass()
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
ModulePass * createAMDGPUPrintfRuntimeBinding()
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static ArgDescriptor createStack(Register Reg, unsigned Mask=~0u)
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:72
void initializeGCNNSAReassignPass(PassRegistry &)
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:303
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
char & UnreachableMachineBlockElimID
UnreachableMachineBlockElimination - This pass removes unreachable machine basic blocks.
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:132
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:151
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:296
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
FunctionPass * createSIFixupVectorISelPass()
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
void initializeSIFixupVectorISelPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
Optional< SIArgumentInfo > ArgInfo
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
unsigned getMainFileID() const
Definition: SourceMgr.h:141
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:66
char & GCNDPPCombineID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:238
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:90
FunctionPass * createFlattenCFGPass()
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
char & SIInsertSkipsPassID
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:313
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3066
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
char & AMDGPUPerfHintAnalysisID
FunctionPass * createR600ControlFlowFinalizer()
Legacy wrapper pass to provide the AMDGPUAAResult object.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise...
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createSIModeRegisterPass()
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
void initializeGCNRegBankReassignPass(PassRegistry &)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
char & AMDGPUUnifyDivergentExitNodesID
bool enableSIScheduler() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
char & SIPreAllocateWWMRegsID
Optional< unsigned > Mask
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeGCNDPPCombinePass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:41
This pass is responsible for selecting generic machine instructions to target-specific instructions...
ModulePass * createAMDGPUFixFunctionBitcastsPass()
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
char & SILowerSGPRSpillsID
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:223
TargetOptions Options
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:184
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
void initializeSILowerSGPRSpillsPass(PassRegistry &)
FunctionPass * createSROAPass()
Definition: SROA.cpp:4641
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
char & GCNRegBankReassignID
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &, const TargetMachine *)
FunctionPass * createAMDGPUUseNativeCallsPass()
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1410
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
Represents a location in source code.
Definition: SMLoc.h:23
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
FunctionPass * createAtomicExpandPass()
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
bool use_empty() const
Definition: Value.h:342
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:448
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:261