LLVM 20.0.0git
AMDGPUGlobalISelDivergenceLowering.cpp
Go to the documentation of this file.
1//===-- AMDGPUGlobalISelDivergenceLowering.cpp ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// GlobalISel pass that selects divergent i1 phis as lane mask phis.
11/// Lane mask merging uses same algorithm as SDAG in SILowerI1Copies.
12/// Handles all cases of temporal divergence.
13/// For divergent non-phi i1 and uniform i1 uses outside of the cycle this pass
14/// currently depends on LCSSA to insert phis with one incoming.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "SILowerI1Copies.h"
24
25#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
26
27using namespace llvm;
28
29namespace {
30
31class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
32public:
33 static char ID;
34
35public:
36 AMDGPUGlobalISelDivergenceLowering() : MachineFunctionPass(ID) {
39 }
40
41 bool runOnMachineFunction(MachineFunction &MF) override;
42
43 StringRef getPassName() const override {
44 return "AMDGPU GlobalISel divergence lowering";
45 }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
53 }
54};
55
56class DivergenceLoweringHelper : public PhiLoweringHelper {
57public:
58 DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
61
62private:
63 MachineUniformityInfo *MUI = nullptr;
65 Register buildRegCopyToLaneMask(Register Reg);
66
67public:
68 void markAsLaneMask(Register DstReg) const override;
70 SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
72 const MachineInstr *MI,
73 SmallVectorImpl<Incoming> &Incomings) const override;
74 void replaceDstReg(Register NewReg, Register OldReg,
75 MachineBasicBlock *MBB) override;
78 Register DstReg, Register PrevReg,
79 Register CurReg) override;
80 void constrainAsLaneMask(Incoming &In) override;
81};
82
83DivergenceLoweringHelper::DivergenceLoweringHelper(
86 : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87
88// _(s1) -> SReg_32/64(s1)
89void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
90 assert(MRI->getType(DstReg) == LLT::scalar(1));
91
92 if (MRI->getRegClassOrNull(DstReg)) {
93 if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
94 return;
95 llvm_unreachable("Failed to constrain register class");
96 }
97
98 MRI->setRegClass(DstReg, ST->getBoolRC());
99}
100
101void DivergenceLoweringHelper::getCandidatesForLowering(
102 SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103 LLT S1 = LLT::scalar(1);
104
105 // Add divergent i1 phis to the list
106 for (MachineBasicBlock &MBB : *MF) {
107 for (MachineInstr &MI : MBB.phis()) {
108 Register Dst = MI.getOperand(0).getReg();
109 if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
110 Vreg1Phis.push_back(&MI);
111 }
112 }
113}
114
115void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
116 const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117 for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
118 Incomings.emplace_back(MI->getOperand(i).getReg(),
119 MI->getOperand(i + 1).getMBB(), Register());
120 }
121}
122
123void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
125 BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
126 .addReg(NewReg);
127}
128
129// Copy Reg to new lane mask register, insert a copy after instruction that
130// defines Reg while skipping phis if needed.
131Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
132 Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
133 MachineInstr *Instr = MRI->getVRegDef(Reg);
134 MachineBasicBlock *MBB = Instr->getParent();
135 B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
136 B.buildCopy(LaneMask, Reg);
137 return LaneMask;
138}
139
140// bb.previous
141// %PrevReg = ...
142//
143// bb.current
144// %CurReg = ...
145//
146// %DstReg - not defined
147//
148// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149//
150// bb.previous
151// %PrevReg = ...
152// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153//
154// bb.current
155// %CurReg = ...
156// %CurRegCopy:sreg_32(s1) = COPY %CurReg
157// ...
158// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
160// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
161//
162// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
163void DivergenceLoweringHelper::buildMergeLaneMasks(
165 Register DstReg, Register PrevReg, Register CurReg) {
166 // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167 // TODO: check if inputs are constants or results of a compare.
168
169 Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
170 Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
171 Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
172 Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
173
174 B.setInsertPt(MBB, I);
175 B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176 B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177 B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178}
179
180// GlobalISel has to constrain S1 incoming taken as-is with lane mask register
181// class. Insert a copy of Incoming.Reg to new lane mask inside Incoming.Block,
182// Incoming.Reg becomes that new lane mask.
183void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {
184 B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
185
186 auto Copy = B.buildCopy(LLT::scalar(1), In.Reg);
187 MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
188 In.Reg = Copy.getReg(0);
189}
190
191} // End anonymous namespace.
192
193INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
194 "AMDGPU GlobalISel divergence lowering", false, false)
198INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
199 "AMDGPU GlobalISel divergence lowering", false, false)
200
201char AMDGPUGlobalISelDivergenceLowering::ID = 0;
202
204 AMDGPUGlobalISelDivergenceLowering::ID;
205
207 return new AMDGPUGlobalISelDivergenceLowering();
208}
209
210bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
211 MachineFunction &MF) {
213 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
215 getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
217 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
218
219 DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
220
221 return Helper.lowerPhis();
222}
unsigned const MachineRegisterInfo * MRI
AMDGPU GlobalISel divergence lowering
static const LLT S1
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineIRBuilder class.
Machine IR instance of the generic uniformity analysis.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
Legacy analysis pass which computes a MachineUniformityInfo.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
virtual void markAsLaneMask(Register DstReg) const =0
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...