LLVM 18.0.0git
AMDGPURewriteUndefForPHI.cpp
Go to the documentation of this file.
1//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements the idea to rewrite undef incoming operand for certain
9// PHIs in structurized CFG. This pass only works on IR that has gone through
10// StructurizedCFG pass, and this pass has some additional limitation that make
11// it can only run after SIAnnotateControlFlow.
12//
13// To achieve optimal code generation for AMDGPU, we assume that uniformity
14// analysis reports the PHI in join block of divergent branch as uniform if
15// it has one unique uniform value plus additional undefined/poisoned incoming
16// value. That is to say the later compiler pipeline will ensure such PHI always
17// return uniform value and ensure it work correctly. Let's take a look at two
18// typical patterns in structured CFG that need to be taken care: (In both
19// patterns, block %if terminate with divergent branch.)
20//
21// Pattern A: Block with undefined incoming value dominates defined predecessor
22// %if
23// | \
24// | %then
25// | /
26// %endif: %phi = phi [%undef, %if], [%uniform, %then]
27//
28// Pattern B: Block with defined incoming value dominates undefined predecessor
29// %if
30// | \
31// | %then
32// | /
33// %endif: %phi = phi [%uniform, %if], [%undef, %then]
34//
35// For pattern A, by reporting %phi as uniform, the later pipeline need to make
36// sure it be handled correctly. The backend usually allocates a scalar register
37// and if any thread in a wave takes %then path, the scalar register will get
38// the %uniform value.
39//
40// For pattern B, we will replace the undef operand with the other defined value
41// in this pass. So the scalar register allocated for such PHI will get correct
42// liveness. Without this transformation, the scalar register may be overwritten
43// in the %then block.
44//
45// Limitation note:
46// If the join block of divergent threads is a loop header, the pass cannot
47// handle it correctly right now. For below case, the undef in %phi should also
48// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49// block to get a separate join block, then we can rewrite the undef correctly.
50// %if
51// | \
52// | %then
53// | /
54// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55// | |
56// \---
57
58#include "AMDGPU.h"
60#include "llvm/IR/BasicBlock.h"
61#include "llvm/IR/Constants.h"
62#include "llvm/IR/Dominators.h"
65
66using namespace llvm;
67
68#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69
70namespace {
71
72class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
73public:
74 static char ID;
75 AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {
77 }
78 bool runOnFunction(Function &F) override;
79 StringRef getPassName() const override {
80 return "AMDGPU Rewrite Undef for PHI";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override {
86
89 AU.setPreservesCFG();
90 }
91};
92
93} // end anonymous namespace
94char AMDGPURewriteUndefForPHILegacy::ID = 0;
95
96INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
97 "Rewrite undef for PHI", false, false)
100INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
101 "Rewrite undef for PHI", false, false)
102
104 bool Changed = false;
105 SmallVector<PHINode *> ToBeDeleted;
106 for (auto &BB : F) {
107 for (auto &PHI : BB.phis()) {
108 if (UA.isDivergent(&PHI))
109 continue;
110
111 // The unique incoming value except undef/poison for the PHI node.
112 Value *UniqueDefinedIncoming = nullptr;
113 // The divergent block with defined incoming value that dominates all
114 // other block with the same incoming value.
115 BasicBlock *DominateBB = nullptr;
116 // Predecessors with undefined incoming value (excluding loop backedge).
118
119 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
120 Value *Incoming = PHI.getIncomingValue(i);
121 BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
122
123 if (Incoming == &PHI)
124 continue;
125
126 if (isa<UndefValue>(Incoming)) {
127 // Undef from loop backedge will not be replaced.
128 if (!DT->dominates(&BB, IncomingBB))
129 Undefs.push_back(IncomingBB);
130 continue;
131 }
132
133 if (!UniqueDefinedIncoming) {
134 UniqueDefinedIncoming = Incoming;
135 DominateBB = IncomingBB;
136 } else if (Incoming == UniqueDefinedIncoming) {
137 // Update DominateBB if necessary.
138 if (DT->dominates(IncomingBB, DominateBB))
139 DominateBB = IncomingBB;
140 } else {
141 UniqueDefinedIncoming = nullptr;
142 break;
143 }
144 }
145 // We only need to replace the undef for the PHI which is merging
146 // defined/undefined values from divergent threads.
147 // TODO: We should still be able to replace undef value if the unique
148 // value is a Constant.
149 if (!UniqueDefinedIncoming || Undefs.empty() ||
150 !UA.isDivergent(DominateBB->getTerminator()))
151 continue;
152
153 // We only replace the undef when DominateBB truly dominates all the
154 // other predecessors with undefined incoming value. Make sure DominateBB
155 // dominates BB so that UniqueDefinedIncoming is available in BB and
156 // afterwards.
157 if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
158 return DT->dominates(DominateBB, UD);
159 })) {
160 PHI.replaceAllUsesWith(UniqueDefinedIncoming);
161 ToBeDeleted.push_back(&PHI);
162 Changed = true;
163 }
164 }
165 }
166
167 for (auto *PHI : ToBeDeleted)
168 PHI->eraseFromParent();
169
170 return Changed;
171}
172
173bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
174 UniformityInfo &UA =
175 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
176 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
177 return rewritePHIs(F, UA, DT);
178}
179
184 bool Changed = rewritePHIs(F, UA, DT);
185 if (Changed) {
188 return PA;
189 }
190
191 return PreservedAnalyses::all();
192}
193
195 return new AMDGPURewriteUndefForPHILegacy();
196}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Rewrite undef for false bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT)
Rewrite undef for PHI
#define DEBUG_TYPE
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define F(x, y, z)
Definition: MD5.cpp:55
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
LLVM IR instance of the generic uniformity analysis.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM Value Representation.
Definition: Value.h:74
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &)
FunctionPass * createAMDGPURewriteUndefForPHILegacyPass()