LLVM  13.0.0git
AMDGPUPropagateAttributes.cpp
Go to the documentation of this file.
1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/IR/InstrTypes.h"
38 
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40 
41 using namespace llvm;
42 
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46 
47 namespace {
48 
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51  AMDGPU::FeatureWavefrontSize16,
52  AMDGPU::FeatureWavefrontSize32,
53  AMDGPU::FeatureWavefrontSize64
54 };
55 
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char* AttributeNames[] = {
59  "amdgpu-waves-per-eu",
60  "amdgpu-flat-work-group-size"
61 };
62 
63 static constexpr unsigned NumAttr =
64  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65 
66 class AMDGPUPropagateAttributes {
67 
68  class FnProperties {
69  private:
70  explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71 
72  public:
73  explicit FnProperties(const TargetMachine &TM, const Function &F) {
74  Features = TM.getSubtargetImpl(F)->getFeatureBits();
75 
76  for (unsigned I = 0; I < NumAttr; ++I)
77  if (F.hasFnAttribute(AttributeNames[I]))
78  Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79  }
80 
81  bool operator == (const FnProperties &Other) const {
82  if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83  return false;
84  for (unsigned I = 0; I < NumAttr; ++I)
85  if (Attributes[I] != Other.Attributes[I])
86  return false;
87  return true;
88  }
89 
90  FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91  FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92  for (unsigned I = 0; I < NumAttr; ++I)
93  New.Attributes[I] = CallerProps.Attributes[I];
94  return New;
95  }
96 
97  FeatureBitset Features;
98  Optional<Attribute> Attributes[NumAttr];
99  };
100 
101  class Clone {
102  public:
103  Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104  Properties(Props), OrigF(OrigF), NewF(NewF) {}
105 
106  FnProperties Properties;
107  Function *OrigF;
108  Function *NewF;
109  };
110 
111  const TargetMachine *TM;
112 
113  // Clone functions as needed or just set attributes.
114  bool AllowClone;
115 
116  // Option propagation roots.
118 
119  // Clones of functions with their attributes.
120  SmallVector<Clone, 32> Clones;
121 
122  // Find a clone with required features.
123  Function *findFunction(const FnProperties &PropsNeeded,
124  Function *OrigF);
125 
126  // Clone function \p F and set \p NewProps on the clone.
127  // Cole takes the name of original function.
128  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
129 
130  // Set new function's features in place.
131  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
132 
133  // Set new function's attributes in place.
134  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135 
136  std::string getFeatureString(const FeatureBitset &Features) const;
137 
138  // Propagate attributes from Roots.
139  bool process();
140 
141 public:
142  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
143  TM(TM), AllowClone(AllowClone) {}
144 
145  // Use F as a root and propagate its attributes.
146  bool process(Function &F);
147 
148  // Propagate attributes starting from kernel functions.
149  bool process(Module &M);
150 };
151 
152 // Allows to propagate attributes early, but no clonning is allowed as it must
153 // be a function pass to run before any optimizations.
154 // TODO: We shall only need a one instance of module pass, but that needs to be
155 // in the linker pipeline which is currently not possible.
156 class AMDGPUPropagateAttributesEarly : public FunctionPass {
157  const TargetMachine *TM;
158 
159 public:
160  static char ID; // Pass identification
161 
162  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
163  FunctionPass(ID), TM(TM) {
166  }
167 
168  bool runOnFunction(Function &F) override;
169 };
170 
171 // Allows to propagate attributes with clonning but does that late in the
172 // pipeline.
173 class AMDGPUPropagateAttributesLate : public ModulePass {
174  const TargetMachine *TM;
175 
176 public:
177  static char ID; // Pass identification
178 
179  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
180  ModulePass(ID), TM(TM) {
183  }
184 
185  bool runOnModule(Module &M) override;
186 };
187 
188 } // end anonymous namespace.
189 
192 
193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
194  "amdgpu-propagate-attributes-early",
195  "Early propagate attributes from kernels to functions",
196  false, false)
197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
199  "Late propagate attributes from kernels to functions",
201 
202 Function *
203 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
204  Function *OrigF) {
205  // TODO: search for clone's clones.
206  for (Clone &C : Clones)
207  if (C.OrigF == OrigF && PropsNeeded == C.Properties)
208  return C.NewF;
209 
210  return nullptr;
211 }
212 
213 bool AMDGPUPropagateAttributes::process(Module &M) {
214  for (auto &F : M.functions())
215  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
216  Roots.insert(&F);
217 
218  return process();
219 }
220 
221 bool AMDGPUPropagateAttributes::process(Function &F) {
222  Roots.insert(&F);
223  return process();
224 }
225 
226 bool AMDGPUPropagateAttributes::process() {
227  bool Changed = false;
228  SmallSet<Function *, 32> NewRoots;
229  SmallSet<Function *, 32> Replaced;
230 
231  if (Roots.empty())
232  return false;
233  Module &M = *(*Roots.begin())->getParent();
234 
235  do {
236  Roots.insert(NewRoots.begin(), NewRoots.end());
237  NewRoots.clear();
238 
239  for (auto &F : M.functions()) {
240  if (F.isDeclaration())
241  continue;
242 
243  const FnProperties CalleeProps(*TM, F);
245  SmallSet<CallBase *, 32> Visited;
246 
247  for (User *U : F.users()) {
248  Instruction *I = dyn_cast<Instruction>(U);
249  if (!I)
250  continue;
251  CallBase *CI = dyn_cast<CallBase>(I);
252  if (!CI)
253  continue;
254  Function *Caller = CI->getCaller();
255  if (!Caller || !Visited.insert(CI).second)
256  continue;
257  if (!Roots.count(Caller) && !NewRoots.count(Caller))
258  continue;
259 
260  const FnProperties CallerProps(*TM, *Caller);
261 
262  if (CalleeProps == CallerProps) {
263  if (!Roots.count(&F))
264  NewRoots.insert(&F);
265  continue;
266  }
267 
268  Function *NewF = findFunction(CallerProps, &F);
269  if (!NewF) {
270  const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271  if (!AllowClone) {
272  // This may set different features on different iteartions if
273  // there is a contradiction in callers' attributes. In this case
274  // we rely on a second pass running on Module, which is allowed
275  // to clone.
276  setFeatures(F, NewProps.Features);
277  setAttributes(F, NewProps.Attributes);
278  NewRoots.insert(&F);
279  Changed = true;
280  break;
281  }
282 
283  NewF = cloneWithProperties(F, NewProps);
284  Clones.push_back(Clone(CallerProps, &F, NewF));
285  NewRoots.insert(NewF);
286  }
287 
288  ToReplace.push_back(std::make_pair(CI, NewF));
289  Replaced.insert(&F);
290 
291  Changed = true;
292  }
293 
294  while (!ToReplace.empty()) {
295  auto R = ToReplace.pop_back_val();
296  R.first->setCalledFunction(R.second);
297  }
298  }
299  } while (!NewRoots.empty());
300 
301  for (Function *F : Replaced) {
302  if (F->use_empty())
303  F->eraseFromParent();
304  }
305 
306  Roots.clear();
307  Clones.clear();
308 
309  return Changed;
310 }
311 
312 Function *
313 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314  const FnProperties &NewProps) {
315  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316 
317  ValueToValueMapTy dummy;
318  Function *NewF = CloneFunction(&F, dummy);
319  setFeatures(*NewF, NewProps.Features);
320  setAttributes(*NewF, NewProps.Attributes);
323 
324  // Swap names. If that is the only clone it will retain the name of now
325  // dead value. Preserve original name for externally visible functions.
326  if (F.hasName() && F.hasLocalLinkage()) {
327  std::string NewName = std::string(NewF->getName());
328  NewF->takeName(&F);
329  F.setName(NewName);
330  }
331 
332  return NewF;
333 }
334 
335 void AMDGPUPropagateAttributes::setFeatures(Function &F,
336  const FeatureBitset &NewFeatures) {
337  std::string NewFeatureStr = getFeatureString(NewFeatures);
338 
339  LLVM_DEBUG(dbgs() << "Set features "
340  << getFeatureString(NewFeatures & TargetFeatures)
341  << " on " << F.getName() << '\n');
342 
343  F.removeFnAttr("target-features");
344  F.addFnAttr("target-features", NewFeatureStr);
345 }
346 
347 void AMDGPUPropagateAttributes::setAttributes(Function &F,
348  const ArrayRef<Optional<Attribute>> NewAttrs) {
349  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350  for (unsigned I = 0; I < NumAttr; ++I) {
351  F.removeFnAttr(AttributeNames[I]);
352  if (NewAttrs[I]) {
353  LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354  F.addFnAttr(*NewAttrs[I]);
355  }
356  }
357 }
358 
359 std::string
360 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361 {
362  std::string Ret;
363  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364  if (Features[KV.Value])
365  Ret += (StringRef("+") + KV.Key + ",").str();
366  else if (TargetFeatures[KV.Value])
367  Ret += (StringRef("-") + KV.Key + ",").str();
368  }
369  Ret.pop_back(); // Remove last comma.
370  return Ret;
371 }
372 
374  if (!TM) {
375  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
376  if (!TPC)
377  return false;
378 
379  TM = &TPC->getTM<TargetMachine>();
380  }
381 
382  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
383  return false;
384 
385  return AMDGPUPropagateAttributes(TM, false).process(F);
386 }
387 
388 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
389  if (!TM) {
390  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
391  if (!TPC)
392  return false;
393 
394  TM = &TPC->getTM<TargetMachine>();
395  }
396 
397  return AMDGPUPropagateAttributes(TM, true).process(M);
398 }
399 
402  return new AMDGPUPropagateAttributesEarly(TM);
403 }
404 
407  return new AMDGPUPropagateAttributesLate(TM);
408 }
409 
413  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
414  return PreservedAnalyses::all();
415 
416  return AMDGPUPropagateAttributes(&TM, false).process(F)
419 }
420 
423  return AMDGPUPropagateAttributes(&TM, true).process(M)
426 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
functions
amdgpu propagate attributes Late propagate attributes from kernels to functions
Definition: AMDGPUPropagateAttributes.cpp:199
llvm::AMDGPUPropagateAttributesEarlyPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPUPropagateAttributes.cpp:411
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:61
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
to
Should compile to
Definition: README.txt:449
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::AMDGPUFeatureKV
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
llvm::Optional
Definition: APInt.h:33
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
attributes
Deduce and propagate attributes
Definition: Attributor.cpp:2603
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:133
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
false
Definition: StackSlotColoring.cpp:142
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:406
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::SubtargetFeatureKV
Used to provide key value pairs for feature and CPU bit flags.
Definition: MCSubtargetInfo.h:34
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:278
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1336
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
AMDGPUMCTargetDesc.h
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
propagate
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
Definition: CFLAndersAliasAnalysis.cpp:596
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2037
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:767
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
TargetSubtargetInfo.h
llvm::CloneFunction
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Definition: CloneFunction.cpp:284
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::ValueMap< const Value *, WeakTrackingVH >
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:401
llvm::AMDGPUPropagateAttributesLatePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUPropagateAttributes.cpp:422
llvm::SmallSet::end
const_iterator end() const
Definition: SmallSet.h:229
late
amdgpu propagate attributes late
Definition: AMDGPUPropagateAttributes.cpp:198
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:377
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1167
AMDGPUBaseInfo.h
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:311