26#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
42 return "AMDGPU GlobalISel divergence lowering";
66 void markAsLaneMask(
Register DstReg)
const override;
67 void getCandidatesForLowering(
69 void collectIncomingValuesFromPhi(
80 bool lowerTemporalDivergence();
81 bool lowerTemporalDivergenceI1();
84DivergenceLoweringHelper::DivergenceLoweringHelper(
87 : PhiLoweringHelper(MF, DT, PDT), MUI(MUI),
B(*MF) {}
90void DivergenceLoweringHelper::markAsLaneMask(
Register DstReg)
const {
93 if (MRI->getRegClassOrNull(DstReg)) {
94 if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
99 MRI->setRegClass(DstReg, ST->getBoolRC());
102void DivergenceLoweringHelper::getCandidatesForLowering(
111 if (
MI.getOpcode() != TargetOpcode::G_PHI)
120void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
123 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
129void DivergenceLoweringHelper::replaceDstReg(
Register NewReg,
Register OldReg,
141 B.setInsertPt(*
MBB,
MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
142 B.buildCopy(LaneMask,
Reg);
169void DivergenceLoweringHelper::buildMergeLaneMasks(
175 Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
176 Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
180 B.setInsertPt(
MBB,
I);
181 B.buildInstr(LMC->AndN2Opc, {PrevMaskedReg}, {PrevRegCopy, LMC->ExecReg});
182 B.buildInstr(LMC->AndOpc, {CurMaskedReg}, {LMC->ExecReg, CurRegCopy});
183 B.buildInstr(LMC->OrOpc, {DstReg}, {PrevMaskedReg, CurMaskedReg});
190 B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
193 MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
194 In.Reg = Copy.getReg(0);
200 if (
Op.isReg() &&
Op.getReg() ==
Reg)
205bool DivergenceLoweringHelper::lowerTemporalDivergence() {
216 replaceUsesOfRegInInstWith(
Reg, UseInst, CachedTDCopy);
224 Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(
Reg));
225 B.buildInstr(AMDGPU::COPY, {VgprReg}, {
Reg})
228 replaceUsesOfRegInInstWith(
Reg, UseInst, VgprReg);
229 TDCache[
Reg] = VgprReg;
234bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
236 initializeLaneMaskRegisterAttributes(BoolS1);
247 auto &CycleMergedMask = LRCCacheIter->getSecond();
249 if (RegNotCached || LRC->contains(CachedLRC)) {
254 for (
auto &LRCCacheEntry : LRCCache) {
256 auto &CycleMergedMask = LRCCacheEntry.getSecond();
259 Register MergedMask = MRI->createVirtualRegister(BoolS1);
265 for (
auto Entry :
Cycle->getEntries()) {
267 if (!
Cycle->contains(Pred)) {
268 B.setInsertPt(*Pred, Pred->getFirstTerminator());
269 auto ImplDef =
B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
275 buildMergeLaneMasks(*
MBB,
MBB->getFirstTerminator(), {}, MergedMask,
278 CycleMergedMask.second = MergedMask;
285 replaceUsesOfRegInInstWith(
Reg, UseInst, LRCCache.
lookup(
Reg).second);
294 "AMDGPU GlobalISel divergence lowering",
false,
false)
301char AMDGPUGlobalISelDivergenceLowering::
ID = 0;
304 AMDGPUGlobalISelDivergenceLowering::
ID;
307 return new AMDGPUGlobalISelDivergenceLowering();
310bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
313 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
315 getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
317 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
319 DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
329 Changed |= Helper.lowerTemporalDivergence();
332 Changed |= Helper.lowerTemporalDivergenceI1();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
This file declares the MachineIRBuilder class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
bool isS32S64LaneMask(Register Reg) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
Wrapper class representing virtual and physical registers.
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
char & AMDGPUGlobalISelDivergenceLoweringID
DWARFExpression::Operation Op
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
MachineCycleInfo::CycleT MachineCycle
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
All attributes(register class or bank and low-level type) a virtual register can have.