LLVM  16.0.0git
AMDGPURewriteUndefForPHI.cpp
Go to the documentation of this file.
1 //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements the idea to rewrite undef incoming operand for certain
9 // PHIs in structurized CFG. This pass only works on IR that has gone through
10 // StructurizedCFG pass, and this pass has some additional limitation that make
11 // it can only run after SIAnnotateControlFlow.
12 //
13 // To achieve optimal code generation for AMDGPU, we assume that divergence
14 // analysis reports the PHI in join block of divergent branch as uniform if
15 // it has one unique uniform value plus additional undefined/poisoned incoming
16 // value. That is to say the later compiler pipeline will ensure such PHI always
17 // return uniform value and ensure it work correctly. Let's take a look at two
18 // typical patterns in structured CFG that need to be taken care: (In both
19 // patterns, block %if terminate with divergent branch.)
20 //
21 // Pattern A: Block with undefined incoming value dominates defined predecessor
22 // %if
23 // | \
24 // | %then
25 // | /
26 // %endif: %phi = phi [%undef, %if], [%uniform, %then]
27 //
28 // Pattern B: Block with defined incoming value dominates undefined predecessor
29 // %if
30 // | \
31 // | %then
32 // | /
33 // %endif: %phi = phi [%uniform, %if], [%undef, %then]
34 //
35 // For pattern A, by reporting %phi as uniform, the later pipeline need to make
36 // sure it be handled correctly. The backend usually allocates a scalar register
37 // and if any thread in a wave takes %then path, the scalar register will get
38 // the %uniform value.
39 //
40 // For pattern B, we will replace the undef operand with the other defined value
41 // in this pass. So the scalar register allocated for such PHI will get correct
42 // liveness. Without this transformation, the scalar register may be overwritten
43 // in the %then block.
44 //
45 // Limitation note:
46 // If the join block of divergent threads is a loop header, the pass cannot
47 // handle it correctly right now. For below case, the undef in %phi should also
48 // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49 // block to get a separate join block, then we can rewrite the undef correctly.
50 // %if
51 // | \
52 // | %then
53 // | /
54 // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55 // | |
56 // \---
57 
58 #include "AMDGPU.h"
60 #include "llvm/IR/BasicBlock.h"
61 #include "llvm/IR/Constants.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/InitializePasses.h"
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69 
70 namespace {
71 
72 class AMDGPURewriteUndefForPHI : public FunctionPass {
73 public:
74  static char ID;
75  AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
77  }
78  bool runOnFunction(Function &F) override;
79  StringRef getPassName() const override {
80  return "AMDGPU Rewrite Undef for PHI";
81  }
82 
83  void getAnalysisUsage(AnalysisUsage &AU) const override {
86 
89  AU.setPreservesCFG();
90  }
91 };
92 
93 } // end anonymous namespace
95 
96 INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
97  "Rewrite undef for PHI", false, false)
100 INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
101  "Rewrite undef for PHI", false, false)
102 
104  bool Changed = false;
105  SmallVector<PHINode *> ToBeDeleted;
106  for (auto &BB : F) {
107  for (auto &PHI : BB.phis()) {
108  if (DA->isDivergent(&PHI))
109  continue;
110 
111  // The unique incoming value except undef/poison for the PHI node.
112  Value *UniqueDefinedIncoming = nullptr;
113  // The divergent block with defined incoming value that dominates all
114  // other block with the same incoming value.
115  BasicBlock *DominateBB = nullptr;
116  // Predecessors with undefined incoming value (excluding loop backedge).
118 
119  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
120  Value *Incoming = PHI.getIncomingValue(i);
121  BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
122 
123  if (Incoming == &PHI)
124  continue;
125 
126  if (isa<UndefValue>(Incoming)) {
127  // Undef from loop backedge will not be replaced.
128  if (!DT->dominates(&BB, IncomingBB))
129  Undefs.push_back(IncomingBB);
130  continue;
131  }
132 
133  if (!UniqueDefinedIncoming) {
134  UniqueDefinedIncoming = Incoming;
135  DominateBB = IncomingBB;
136  } else if (Incoming == UniqueDefinedIncoming) {
137  // Update DominateBB if necessary.
138  if (DT->dominates(IncomingBB, DominateBB))
139  DominateBB = IncomingBB;
140  } else {
141  UniqueDefinedIncoming = nullptr;
142  break;
143  }
144  }
145  // We only need to replace the undef for the PHI which is merging
146  // defined/undefined values from divergent threads.
147  // TODO: We should still be able to replace undef value if the unique
148  // value is a Constant.
149  if (!UniqueDefinedIncoming || Undefs.empty() ||
150  !DA->isDivergent(DominateBB->getTerminator()))
151  continue;
152 
153  // We only replace the undef when DominateBB truly dominates all the
154  // other predecessors with undefined incoming value. Make sure DominateBB
155  // dominates BB so that UniqueDefinedIncoming is available in BB and
156  // afterwards.
157  if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
158  return DT->dominates(DominateBB, UD);
159  })) {
160  PHI.replaceAllUsesWith(UniqueDefinedIncoming);
161  ToBeDeleted.push_back(&PHI);
162  Changed = true;
163  }
164  }
165  }
166 
167  for (auto *PHI : ToBeDeleted)
168  PHI->eraseFromParent();
169 
170  return Changed;
171 }
172 
174  LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
175  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
176  return rewritePHIs(F, DA, DT);
177 }
178 
180  return new AMDGPURewriteUndefForPHI();
181 }
i
i
Definition: README.txt:29
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::Function
Definition: Function.h:60
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPURewriteUndefForPHI.cpp:68
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::initializeAMDGPURewriteUndefForPHIPass
void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &)
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
BasicBlock.h
llvm::AArch64PACKey::DA
@ DA
Definition: AArch64BaseInfo.h:821
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE, "Rewrite undef for PHI", false, false) INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI
Instructions.h
LegacyDivergenceAnalysis.h
Dominators.h
rewritePHIs
Rewrite undef for false bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT)
Definition: AMDGPURewriteUndefForPHI.cpp:103
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::createAMDGPURewriteUndefForPHIPass
FunctionPass * createAMDGPURewriteUndefForPHIPass()
Definition: AMDGPURewriteUndefForPHI.cpp:179