Go to the documentation of this file.
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
51 AMDGPU::FeatureWavefrontSize16,
52 AMDGPU::FeatureWavefrontSize32,
53 AMDGPU::FeatureWavefrontSize64
58 static constexpr
const char *AttributeNames[] = {
"amdgpu-waves-per-eu"};
60 static constexpr
unsigned NumAttr =
61 sizeof(AttributeNames) /
sizeof(AttributeNames[0]);
63 class AMDGPUPropagateAttributes {
67 explicit FnProperties(
const FeatureBitset &&FB) : Features(FB) {}
71 Features =
TM.getSubtargetImpl(
F)->getFeatureBits();
73 for (
unsigned I = 0;
I < NumAttr; ++
I)
74 if (
F.hasFnAttribute(AttributeNames[
I]))
78 bool operator == (
const FnProperties &Other)
const {
79 if ((Features & TargetFeatures) != (
Other.Features & TargetFeatures))
81 for (
unsigned I = 0;
I < NumAttr; ++
I)
87 FnProperties adjustToCaller(
const FnProperties &CallerProps)
const {
88 FnProperties
New((Features & ~TargetFeatures) | CallerProps.Features);
89 for (
unsigned I = 0;
I < NumAttr; ++
I)
90 New.Attributes[
I] = CallerProps.Attributes[
I];
101 Properties(Props), OrigF(OrigF), NewF(NewF) {}
103 FnProperties Properties;
120 Function *findFunction(
const FnProperties &PropsNeeded,
133 std::string getFeatureString(
const FeatureBitset &Features)
const;
140 TM(
TM), AllowClone(AllowClone) {}
153 class AMDGPUPropagateAttributesEarly :
public FunctionPass {
170 class AMDGPUPropagateAttributesLate :
public ModulePass {
182 bool runOnModule(
Module &M)
override;
191 "amdgpu-propagate-attributes-early",
192 "Early propagate attributes from kernels to functions",
200 AMDGPUPropagateAttributes::findFunction(
const FnProperties &PropsNeeded,
203 for (Clone &
C : Clones)
204 if (
C.OrigF == OrigF && PropsNeeded ==
C.Properties)
210 bool AMDGPUPropagateAttributes::process(
Module &M) {
211 for (
auto &
F :
M.functions())
218 bool AMDGPUPropagateAttributes::process(
Function &
F) {
223 bool AMDGPUPropagateAttributes::process() {
224 bool Changed =
false;
232 Roots.insert(NewRoots.
begin(), NewRoots.
end());
235 for (
auto &
F :
M.functions()) {
236 if (
F.isDeclaration())
239 const FnProperties CalleeProps(*
TM,
F);
243 for (
User *U :
F.users()) {
255 if (!Caller || !Visited.
insert(CI).second)
257 if (!Roots.count(Caller) && !NewRoots.
count(Caller))
260 const FnProperties CallerProps(*
TM, *Caller);
262 if (CalleeProps == CallerProps) {
263 if (!Roots.count(&
F))
268 Function *NewF = findFunction(CallerProps, &
F);
270 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
276 setFeatures(
F, NewProps.Features);
277 setAttributes(
F, NewProps.Attributes);
283 NewF = cloneWithProperties(
F, NewProps);
284 Clones.push_back(Clone(CallerProps, &
F, NewF));
288 ToReplace.push_back(std::make_pair(CI, NewF));
294 while (!ToReplace.empty()) {
296 R.first->setCalledFunction(
R.second);
299 }
while (!NewRoots.
empty());
303 F->eraseFromParent();
313 AMDGPUPropagateAttributes::cloneWithProperties(
Function &
F,
314 const FnProperties &NewProps) {
319 setFeatures(*NewF, NewProps.Features);
320 setAttributes(*NewF, NewProps.Attributes);
326 if (
F.hasName() &&
F.hasLocalLinkage()) {
327 std::string NewName = std::string(NewF->
getName());
335 void AMDGPUPropagateAttributes::setFeatures(
Function &
F,
337 std::string NewFeatureStr = getFeatureString(NewFeatures);
340 << getFeatureString(NewFeatures & TargetFeatures)
341 <<
" on " <<
F.getName() <<
'\n');
343 F.removeFnAttr(
"target-features");
344 F.addFnAttr(
"target-features", NewFeatureStr);
347 void AMDGPUPropagateAttributes::setAttributes(
Function &
F,
350 for (
unsigned I = 0;
I < NumAttr; ++
I) {
351 F.removeFnAttr(AttributeNames[
I]);
354 F.addFnAttr(*NewAttrs[
I]);
360 AMDGPUPropagateAttributes::getFeatureString(
const FeatureBitset &Features)
const
364 if (Features[KV.Value])
366 else if (TargetFeatures[KV.Value])
375 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
385 return AMDGPUPropagateAttributes(
TM,
false).process(
F);
388 bool AMDGPUPropagateAttributesLate::runOnModule(
Module &M) {
390 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
397 return AMDGPUPropagateAttributes(
TM,
true).process(M);
402 return new AMDGPUPropagateAttributesEarly(
TM);
407 return new AMDGPUPropagateAttributesLate(
TM);
416 return AMDGPUPropagateAttributes(&
TM,
false).process(
F)
423 return AMDGPUPropagateAttributes(&
TM,
true).process(
M)
A set of analyses that are preserved following a run of a transformation pass.
amdgpu propagate attributes Late propagate attributes from kernels to functions
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
@ DefaultVisibility
The GV is visible.
Container class for subtarget features.
Deduce and propagate attributes
LLVM_NODISCARD T pop_back_val()
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
(vector float) vec_cmpeq(*A, *B) C
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
@ InternalLinkage
Rename collisions when linking (static functions).
Used to provide key value pairs for feature and CPU bit flags.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getCaller()
Helper to get the caller (the parent function).
bool isEntryFunctionCC(CallingConv::ID CC)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
amdgpu propagate attributes Late propagate attributes from kernels to false
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
void setLinkage(LinkageTypes LT)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
const_iterator begin() const
bool operator==(uint64_t V1, const APInt &V2)
A Module instance is used to store all the information related to an LLVM module.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
StringRef - Represent a constant reference to a string, i.e.
static const Function * getParent(const Value *V)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
StringRef getName() const
Return a constant reference to the value's name.
static bool runOnFunction(Function &F, bool PostInlining)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const_iterator end() const
Value * getCalledOperand() const
amdgpu propagate attributes late
LLVM_NODISCARD bool empty() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A container for analyses that lazily runs them and caches their results.
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
void takeName(Value *V)
Transfer the name from V to this value.
void setVisibility(VisibilityTypes V)
Optional< std::vector< StOtherPiece > > Other