LLVM  14.0.0git
AMDGPUPropagateAttributes.cpp
Go to the documentation of this file.
1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attributes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is cloned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/IR/InstrTypes.h"
38 
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40 
41 using namespace llvm;
42 
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46 
47 namespace {
48 
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51  AMDGPU::FeatureWavefrontSize16,
52  AMDGPU::FeatureWavefrontSize32,
53  AMDGPU::FeatureWavefrontSize64
54 };
55 
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char* AttributeNames[] = {
59  "amdgpu-waves-per-eu",
60  "amdgpu-flat-work-group-size"
61 };
62 
63 static constexpr unsigned NumAttr =
64  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65 
66 class AMDGPUPropagateAttributes {
67 
68  class FnProperties {
69  private:
70  explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71 
72  public:
73  explicit FnProperties(const TargetMachine &TM, const Function &F) {
74  Features = TM.getSubtargetImpl(F)->getFeatureBits();
75 
76  for (unsigned I = 0; I < NumAttr; ++I)
77  if (F.hasFnAttribute(AttributeNames[I]))
78  Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79  }
80 
81  bool operator == (const FnProperties &Other) const {
82  if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83  return false;
84  for (unsigned I = 0; I < NumAttr; ++I)
85  if (Attributes[I] != Other.Attributes[I])
86  return false;
87  return true;
88  }
89 
90  FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91  FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92  for (unsigned I = 0; I < NumAttr; ++I)
93  New.Attributes[I] = CallerProps.Attributes[I];
94  return New;
95  }
96 
97  FeatureBitset Features;
99  };
100 
101  class Clone {
102  public:
103  Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104  Properties(Props), OrigF(OrigF), NewF(NewF) {}
105 
106  FnProperties Properties;
107  Function *OrigF;
108  Function *NewF;
109  };
110 
111  const TargetMachine *TM;
112 
113  // Clone functions as needed or just set attributes.
114  bool AllowClone;
115 
116  // Option propagation roots.
118 
119  // Clones of functions with their attributes.
120  SmallVector<Clone, 32> Clones;
121 
122  // Find a clone with required features.
123  Function *findFunction(const FnProperties &PropsNeeded,
124  Function *OrigF);
125 
126  // Clone function \p F and set \p NewProps on the clone.
127  // Cole takes the name of original function.
128  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
129 
130  // Set new function's features in place.
131  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
132 
133  // Set new function's attributes in place.
134  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135 
136  std::string getFeatureString(const FeatureBitset &Features) const;
137 
138  // Propagate attributes from Roots.
139  bool process();
140 
141 public:
142  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
143  TM(TM), AllowClone(AllowClone) {}
144 
145  // Use F as a root and propagate its attributes.
146  bool process(Function &F);
147 
148  // Propagate attributes starting from kernel functions.
149  bool process(Module &M);
150 };
151 
152 // Allows to propagate attributes early, but no cloning is allowed as it must
153 // be a function pass to run before any optimizations.
154 // TODO: We shall only need a one instance of module pass, but that needs to be
155 // in the linker pipeline which is currently not possible.
156 class AMDGPUPropagateAttributesEarly : public FunctionPass {
157  const TargetMachine *TM;
158 
159 public:
160  static char ID; // Pass identification
161 
162  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
163  FunctionPass(ID), TM(TM) {
166  }
167 
168  bool runOnFunction(Function &F) override;
169 };
170 
171 // Allows to propagate attributes with cloning but does that late in the
172 // pipeline.
173 class AMDGPUPropagateAttributesLate : public ModulePass {
174  const TargetMachine *TM;
175 
176 public:
177  static char ID; // Pass identification
178 
179  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
180  ModulePass(ID), TM(TM) {
183  }
184 
185  bool runOnModule(Module &M) override;
186 };
187 
188 } // end anonymous namespace.
189 
192 
193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
194  "amdgpu-propagate-attributes-early",
195  "Early propagate attributes from kernels to functions",
196  false, false)
197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
199  "Late propagate attributes from kernels to functions",
201 
202 Function *
203 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
204  Function *OrigF) {
205  // TODO: search for clone's clones.
206  for (Clone &C : Clones)
207  if (C.OrigF == OrigF && PropsNeeded == C.Properties)
208  return C.NewF;
209 
210  return nullptr;
211 }
212 
213 bool AMDGPUPropagateAttributes::process(Module &M) {
214  for (auto &F : M.functions())
215  if (AMDGPU::isKernel(F.getCallingConv()))
216  Roots.insert(&F);
217 
218  return Roots.empty() ? false : process();
219 }
220 
221 bool AMDGPUPropagateAttributes::process(Function &F) {
222  Roots.insert(&F);
223  return process();
224 }
225 
226 bool AMDGPUPropagateAttributes::process() {
227  bool Changed = false;
228  SmallSet<Function *, 32> NewRoots;
229  SmallSet<Function *, 32> Replaced;
230 
231  assert(!Roots.empty());
232  Module &M = *(*Roots.begin())->getParent();
233 
234  do {
235  Roots.insert(NewRoots.begin(), NewRoots.end());
236  NewRoots.clear();
237 
238  for (auto &F : M.functions()) {
239  if (F.isDeclaration())
240  continue;
241 
242  const FnProperties CalleeProps(*TM, F);
244  SmallSet<CallBase *, 32> Visited;
245 
246  for (User *U : F.users()) {
247  Instruction *I = dyn_cast<Instruction>(U);
248  if (!I)
249  continue;
250  CallBase *CI = dyn_cast<CallBase>(I);
251  // Only propagate attributes if F is the called function. Specifically,
252  // do not propagate attributes if F is passed as an argument.
253  // FIXME: handle bitcasted callee, e.g.
254  // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
255  if (!CI || CI->getCalledOperand() != &F)
256  continue;
257  Function *Caller = CI->getCaller();
258  if (!Caller || !Visited.insert(CI).second)
259  continue;
260  if (!Roots.count(Caller) && !NewRoots.count(Caller))
261  continue;
262 
263  const FnProperties CallerProps(*TM, *Caller);
264 
265  if (CalleeProps == CallerProps) {
266  if (!Roots.count(&F))
267  NewRoots.insert(&F);
268  continue;
269  }
270 
271  Function *NewF = findFunction(CallerProps, &F);
272  if (!NewF) {
273  const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
274  if (!AllowClone) {
275  // This may set different features on different iterations if
276  // there is a contradiction in callers' attributes. In this case
277  // we rely on a second pass running on Module, which is allowed
278  // to clone.
279  setFeatures(F, NewProps.Features);
280  setAttributes(F, NewProps.Attributes);
281  NewRoots.insert(&F);
282  Changed = true;
283  break;
284  }
285 
286  NewF = cloneWithProperties(F, NewProps);
287  Clones.push_back(Clone(CallerProps, &F, NewF));
288  NewRoots.insert(NewF);
289  }
290 
291  ToReplace.push_back(std::make_pair(CI, NewF));
292  Replaced.insert(&F);
293 
294  Changed = true;
295  }
296 
297  while (!ToReplace.empty()) {
298  auto R = ToReplace.pop_back_val();
299  R.first->setCalledFunction(R.second);
300  }
301  }
302  } while (!NewRoots.empty());
303 
304  for (Function *F : Replaced) {
305  if (F->use_empty())
306  F->eraseFromParent();
307  }
308 
309  Roots.clear();
310  Clones.clear();
311 
312  return Changed;
313 }
314 
315 Function *
316 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
317  const FnProperties &NewProps) {
318  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
319 
320  ValueToValueMapTy dummy;
321  Function *NewF = CloneFunction(&F, dummy);
322  setFeatures(*NewF, NewProps.Features);
323  setAttributes(*NewF, NewProps.Attributes);
326 
327  // Swap names. If that is the only clone it will retain the name of now
328  // dead value. Preserve original name for externally visible functions.
329  if (F.hasName() && F.hasLocalLinkage()) {
330  std::string NewName = std::string(NewF->getName());
331  NewF->takeName(&F);
332  F.setName(NewName);
333  }
334 
335  return NewF;
336 }
337 
338 void AMDGPUPropagateAttributes::setFeatures(Function &F,
339  const FeatureBitset &NewFeatures) {
340  std::string NewFeatureStr = getFeatureString(NewFeatures);
341 
342  LLVM_DEBUG(dbgs() << "Set features "
343  << getFeatureString(NewFeatures & TargetFeatures)
344  << " on " << F.getName() << '\n');
345 
346  F.removeFnAttr("target-features");
347  F.addFnAttr("target-features", NewFeatureStr);
348 }
349 
350 void AMDGPUPropagateAttributes::setAttributes(Function &F,
351  const ArrayRef<Optional<Attribute>> NewAttrs) {
352  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
353  for (unsigned I = 0; I < NumAttr; ++I) {
354  F.removeFnAttr(AttributeNames[I]);
355  if (NewAttrs[I]) {
356  LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
357  F.addFnAttr(*NewAttrs[I]);
358  }
359  }
360 }
361 
362 std::string
363 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
364 {
365  std::string Ret;
366  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
367  if (Features[KV.Value])
368  Ret += (StringRef("+") + KV.Key + ",").str();
369  else if (TargetFeatures[KV.Value])
370  Ret += (StringRef("-") + KV.Key + ",").str();
371  }
372  Ret.pop_back(); // Remove last comma.
373  return Ret;
374 }
375 
377  if (!TM) {
378  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
379  if (!TPC)
380  return false;
381 
382  TM = &TPC->getTM<TargetMachine>();
383  }
384 
385  if (!AMDGPU::isKernel(F.getCallingConv()))
386  return false;
387 
388  return AMDGPUPropagateAttributes(TM, false).process(F);
389 }
390 
391 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
392  if (!TM) {
393  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
394  if (!TPC)
395  return false;
396 
397  TM = &TPC->getTM<TargetMachine>();
398  }
399 
400  return AMDGPUPropagateAttributes(TM, true).process(M);
401 }
402 
405  return new AMDGPUPropagateAttributesEarly(TM);
406 }
407 
410  return new AMDGPUPropagateAttributesLate(TM);
411 }
412 
416  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
417  return PreservedAnalyses::all();
418 
419  return AMDGPUPropagateAttributes(&TM, false).process(F)
422 }
423 
426  return AMDGPUPropagateAttributes(&TM, true).process(M)
429 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
functions
amdgpu propagate attributes Late propagate attributes from kernels to functions
Definition: AMDGPUPropagateAttributes.cpp:199
llvm::AMDGPUPropagateAttributesEarlyPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPUPropagateAttributes.cpp:414
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function::empty
bool empty() const
Definition: Function.h:739
llvm::Function
Definition: Function.h:61
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
to
Should compile to
Definition: README.txt:449
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::AMDGPUFeatureKV
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
llvm::Optional
Definition: APInt.h:33
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
attributes
Deduce and propagate attributes
Definition: Attributor.cpp:3074
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:723
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
false
Definition: StackSlotColoring.cpp:142
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:409
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::SubtargetFeatureKV
Used to provide key value pairs for feature and CPU bit flags.
Definition: MCSubtargetInfo.h:34
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1382
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
AMDGPUMCTargetDesc.h
false
amdgpu propagate attributes Late propagate attributes from kernels to false
Definition: AMDGPUPropagateAttributes.cpp:200
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
Attributes
AMDGPU Kernel Attributes
Definition: AMDGPULowerKernelAttributes.cpp:254
propagate
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
Definition: CFLAndersAliasAnalysis.cpp:596
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1974
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:818
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
TargetSubtargetInfo.h
llvm::CloneFunction
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Definition: CloneFunction.cpp:283
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::ValueMap< const Value *, WeakTrackingVH >
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:404
llvm::AMDGPUPropagateAttributesLatePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUPropagateAttributes.cpp:425
llvm::SmallSet::end
const_iterator end() const
Definition: SmallSet.h:229
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1386
late
amdgpu propagate attributes late
Definition: AMDGPUPropagateAttributes.cpp:198
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:382
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
AMDGPUBaseInfo.h
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:359