LLVM  14.0.0git
AMDGPUAnnotateUniformValues.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/IR/InstVisitor.h"
20 #include "llvm/InitializePasses.h"
21 
22 #define DEBUG_TYPE "amdgpu-annotate-uniform"
23 
24 using namespace llvm;
25 
26 namespace {
27 
28 class AMDGPUAnnotateUniformValues : public FunctionPass,
29  public InstVisitor<AMDGPUAnnotateUniformValues> {
31  MemorySSA *MSSA;
33  bool isEntryFunc;
34 
35 public:
36  static char ID;
37  AMDGPUAnnotateUniformValues() :
38  FunctionPass(ID) { }
39  bool doInitialization(Module &M) override;
40  bool runOnFunction(Function &F) override;
41  StringRef getPassName() const override {
42  return "AMDGPU Annotate Uniform Values";
43  }
44  void getAnalysisUsage(AnalysisUsage &AU) const override {
47  AU.setPreservesAll();
48  }
49 
50  void visitBranchInst(BranchInst &I);
51  void visitLoadInst(LoadInst &I);
52  bool isClobberedInFunction(LoadInst * Load);
53 };
54 
55 } // End anonymous namespace
56 
57 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
58  "Add AMDGPU uniform metadata", false, false)
61 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
62  "Add AMDGPU uniform metadata", false, false)
63 
64 char AMDGPUAnnotateUniformValues::ID = 0;
65 
67  I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
68 }
70  I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
71 }
72 
73 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
74  const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load);
75  return !MSSA->isLiveOnEntryDef(MA);
76 }
77 
78 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
79  if (DA->isUniform(&I))
81 }
82 
83 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
84  Value *Ptr = I.getPointerOperand();
85  if (!DA->isUniform(Ptr))
86  return;
87  auto isGlobalLoad = [&](LoadInst &Load)->bool {
88  return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
89  };
90  // We're tracking up to the Function boundaries, and cannot go beyond because
91  // of FunctionPass restrictions. We can ensure that is memory not clobbered
92  // for memory operations that are live in to entry points only.
93  Instruction *PtrI = dyn_cast<Instruction>(Ptr);
94 
95  if (!isEntryFunc) {
96  if (PtrI)
97  setUniformMetadata(PtrI);
98  return;
99  }
100 
101  bool NotClobbered = false;
102  bool GlobalLoad = isGlobalLoad(I);
103  if (PtrI)
104  NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
105  else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
106  if (GlobalLoad && !isClobberedInFunction(&I)) {
107  NotClobbered = true;
108  // Lookup for the existing GEP
109  if (noClobberClones.count(Ptr)) {
110  PtrI = noClobberClones[Ptr];
111  } else {
112  // Create GEP of the Value
113  Function *F = I.getParent()->getParent();
115  Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
116  // Insert GEP at the entry to make it dominate all uses
117  PtrI = GetElementPtrInst::Create(I.getType(), Ptr,
118  ArrayRef<Value *>(Idx), Twine(""),
119  F->getEntryBlock().getFirstNonPHI());
120  }
121  I.replaceUsesOfWith(Ptr, PtrI);
122  }
123  }
124 
125  if (PtrI) {
126  setUniformMetadata(PtrI);
127  if (NotClobbered)
128  setNoClobberMetadata(PtrI);
129  }
130 }
131 
132 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
133  return false;
134 }
135 
137  if (skipFunction(F))
138  return false;
139 
140  DA = &getAnalysis<LegacyDivergenceAnalysis>();
141  MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
142  isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
143 
144  visit(F);
145  noClobberClones.clear();
146  return true;
147 }
148 
149 FunctionPass *
151  return new AMDGPUAnnotateUniformValues();
152 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Function
Definition: Function.h:62
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
F
#define F(x, y, z)
Definition: MD5.cpp:56
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:150
false
Definition: StackSlotColoring.cpp:142
metadata
Add AMDGPU uniform metadata
Definition: AMDGPUAnnotateUniformValues.cpp:62
llvm::Instruction
Definition: Instruction.h:45
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1383
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::uniform
T uniform(GenT &Gen, T Min, T Max)
Return a uniformly distributed random value between Min and Max.
Definition: Random.h:21
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:705
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:954
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
InstVisitor.h
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::MemoryAccess
Definition: MemorySSA.h:137
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::M68kBeads::DA
@ DA
Definition: M68kBaseInfo.h:59
setUniformMetadata
static void setUniformMetadata(Instruction *I)
Definition: AMDGPUAnnotateUniformValues.cpp:66
MemorySSA.h
llvm::Constant::getIntegerValue
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:388
LegacyDivergenceAnalysis.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3101
InitializePasses.h
setNoClobberMetadata
static void setNoClobberMetadata(Instruction *I)
Definition: AMDGPUAnnotateUniformValues.cpp:69
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUAnnotateUniformValues.cpp:22
AMDGPUBaseInfo.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38