LLVM 22.0.0git
AMDGPUUniformIntrinsicCombine.cpp
Go to the documentation of this file.
1//===-- AMDGPUUniformIntrinsicCombine.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass simplifies certain intrinsic calls when the arguments are uniform.
11/// It's true that this pass has transforms that can lead to a situation where
12/// some instruction whose operand was previously recognized as statically
13/// uniform is later on no longer recognized as statically uniform. However, the
14/// semantics of how programs execute don't (and must not, for this precise
15/// reason) care about static uniformity, they only ever care about dynamic
16/// uniformity. And every instruction that's downstream and cares about dynamic
17/// uniformity must be convergent (and isel will introduce v_readfirstlane for
18/// them if their operands can't be proven statically uniform).
19///
20/// This pass is implemented as a ModulePass because intrinsic declarations
21/// exist at the module scope, allowing us to skip processing entirely if no
22/// declarations are present and to traverse their user lists directly when
23/// they are. A FunctionPass would instead require scanning every instruction
24/// in every function to find relevant intrinsics, which is far less efficient.
25//===----------------------------------------------------------------------===//
26
27#include "AMDGPU.h"
28#include "GCNSubtarget.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstVisitor.h"
38#include "llvm/IR/IntrinsicsAMDGPU.h"
43
44#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"
45
46using namespace llvm;
47using namespace llvm::AMDGPU;
48using namespace llvm::PatternMatch;
49
50/// Wrapper for querying uniformity info that first checks locally tracked
51/// instructions.
52static bool
54 const ValueMap<const Value *, bool> &Tracker) {
55 Value *V = U.get();
56 if (auto It = Tracker.find(V); It != Tracker.end())
57 return !It->second; // divergent if marked false
58 return UI.isDivergentUse(U);
59}
60
61/// Optimizes uniform intrinsics calls if their operand can be proven uniform.
63 const UniformityInfo &UI,
65 llvm::Intrinsic::ID IID = II.getIntrinsicID();
66
67 switch (IID) {
68 case Intrinsic::amdgcn_permlane64:
69 case Intrinsic::amdgcn_readfirstlane:
70 case Intrinsic::amdgcn_readlane: {
71 Value *Src = II.getArgOperand(0);
72 if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
73 return false;
74 LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n');
75 II.replaceAllUsesWith(Src);
76 II.eraseFromParent();
77 return true;
78 }
79 case Intrinsic::amdgcn_ballot: {
80 Value *Src = II.getArgOperand(0);
81 if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
82 return false;
83 LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n');
84
85 bool Changed = false;
86 for (User *U : make_early_inc_range(II.users())) {
87 if (auto *ICmp = dyn_cast<ICmpInst>(U)) {
88 Value *Op0 = ICmp->getOperand(0);
89 Value *Op1 = ICmp->getOperand(1);
90 ICmpInst::Predicate Pred = ICmp->getPredicate();
91 Value *OtherOp = Op0 == &II ? Op1 : Op0;
92
93 if (Pred == ICmpInst::ICMP_EQ && match(OtherOp, m_Zero())) {
94 // Case: (icmp eq %ballot, 0) -> xor %ballot_arg, 1
95 Instruction *NotOp =
96 BinaryOperator::CreateNot(Src, "", ICmp->getIterator());
97 Tracker[NotOp] = true; // NOT preserves uniformity
98 LLVM_DEBUG(dbgs() << "Replacing ICMP_EQ: " << *NotOp << '\n');
99 ICmp->replaceAllUsesWith(NotOp);
100 ICmp->eraseFromParent();
101 Changed = true;
102 } else if (Pred == ICmpInst::ICMP_NE && match(OtherOp, m_Zero())) {
103 // Case: (icmp ne %ballot, 0) -> %ballot_arg
104 LLVM_DEBUG(dbgs() << "Replacing ICMP_NE with ballot argument: "
105 << *Src << '\n');
106 ICmp->replaceAllUsesWith(Src);
107 ICmp->eraseFromParent();
108 Changed = true;
109 }
110 }
111 }
112 // Erase the intrinsic if it has no remaining uses.
113 if (II.use_empty())
114 II.eraseFromParent();
115 return Changed;
116 }
117 default:
118 llvm_unreachable("Unexpected intrinsic ID in optimizeUniformIntrinsic");
119 }
120 return false;
121}
122
123/// Iterates over intrinsic declarations in the module to optimize their uses.
125 bool IsChanged = false;
127
130 for (Function &F : M) {
131 switch (F.getIntrinsicID()) {
132 case Intrinsic::amdgcn_permlane64:
133 case Intrinsic::amdgcn_readfirstlane:
134 case Intrinsic::amdgcn_readlane:
135 case Intrinsic::amdgcn_ballot:
136 break;
137 default:
138 continue;
139 }
140
141 for (User *U : make_early_inc_range(F.users())) {
142 auto *II = cast<IntrinsicInst>(U);
143 Function *ParentF = II->getFunction();
144 const auto &UI = FAM.getResult<UniformityInfoAnalysis>(*ParentF);
145 IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker);
146 }
147 }
148 return IsChanged;
149}
150
static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM)
Iterates over intrinsic declarations in the module to optimize their uses.
static bool optimizeUniformIntrinsic(IntrinsicInst &II, const UniformityInfo &UI, ValueMap< const Value *, bool > &Tracker)
Optimizes uniform intrinsics calls if their operand can be proven uniform.
static bool isDivergentUseWithNew(const Use &U, const UniformityInfo &UI, const ValueMap< const Value *, bool > &Tracker)
Wrapper for querying uniformity info that first checks locally tracked instructions.
AMD GCN specific subclass of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:55
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:114
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Analysis pass which computes UniformityInfo.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
See the file comment.
Definition ValueMap.h:84
iterator find(const KeyT &Val)
Definition ValueMap.h:160
iterator end()
Definition ValueMap.h:139
LLVM Value Representation.
Definition Value.h:75
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)