LLVM 23.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &MIX,
39 const MachineInstr &MIY, bool IsVOPD3,
40 bool AllowSameVGPR) {
41 namespace VOPD = AMDGPU::VOPD;
42
43 const MachineFunction *MF = MIX.getMF();
44 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
45
46 if (IsVOPD3 && !ST.hasVOPD3())
47 return false;
48 if (!IsVOPD3 && (TII.isVOP3(MIX) || TII.isVOP3(MIY)))
49 return false;
50 if (TII.isDPP(MIX) || TII.isDPP(MIY))
51 return false;
52
53 const SIRegisterInfo *TRI = ST.getRegisterInfo();
54 const MachineRegisterInfo &MRI = MF->getRegInfo();
55 // Literals also count against scalar bus limit
57 auto addLiteral = [&](const MachineOperand &Op) {
58 for (auto &Literal : UniqueLiterals) {
59 if (Literal->isIdenticalTo(Op))
60 return;
61 }
62 UniqueLiterals.push_back(&Op);
63 };
64 SmallSet<Register, 4> UniqueScalarRegs;
65
66 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
67 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
68 const MachineOperand &Operand = MI.getOperand(OperandIdx);
69 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
70 return Operand.getReg();
71 return Register();
72 };
73
74 auto InstInfo = AMDGPU::getVOPDInstInfo(MIX.getDesc(), MIY.getDesc());
75
76 for (auto CompIdx : VOPD::COMPONENTS) {
77 const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
78
79 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
80 if (Src0.isReg()) {
81 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
82 UniqueScalarRegs.insert(Src0.getReg());
83 }
84 } else if (!TII.isInlineConstant(Src0)) {
85 if (IsVOPD3)
86 return false;
87 addLiteral(Src0);
88 }
89
90 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
91 if (IsVOPD3)
92 return false;
93
94 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
95 addLiteral(MI.getOperand(CompOprIdx));
96 }
97 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
98 UniqueScalarRegs.insert(AMDGPU::VCC_LO);
99
100 if (IsVOPD3) {
101 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
102 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
103 if (!Src)
104 continue;
105 if (OpName == AMDGPU::OpName::src2) {
106 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
107 continue;
108 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
109 UniqueScalarRegs.insert(Src->getReg());
110 continue;
111 }
112 }
113 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
114 return false;
115 }
116
117 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
118 AMDGPU::OpName::op_sel}) {
119 if (TII.hasModifiersSet(MI, OpName))
120 return false;
121 }
122
123 // Neg is allowed, other modifiers are not. NB: even though sext has the
124 // same value as neg, there are no combinable instructions with sext.
125 for (auto OpName :
126 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
127 AMDGPU::OpName::src2_modifiers}) {
128 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
129 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
130 return false;
131 }
132 }
133 }
134
135 if (UniqueLiterals.size() > 1)
136 return false;
137 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
138 return false;
139
140 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
141 // source-cache.
142 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
143 MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
144 MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
145
146 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
147 IsVOPD3))
148 return false;
149
150 if (IsVOPD3) {
151 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
152 // MIX check is only relevant to scheduling?
153 if (AMDGPU::hasNamedOperand(MIX.getOpcode(), AMDGPU::OpName::bitop3)) {
154 const MachineOperand &Src2 =
155 *TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
156 if (!Src2.isImm() || Src2.getImm())
157 return false;
158 }
159 if (AMDGPU::hasNamedOperand(MIY.getOpcode(), AMDGPU::OpName::bitop3)) {
160 const MachineOperand &Src2 =
161 *TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
162 if (!Src2.isImm() || Src2.getImm())
163 return false;
164 }
165 }
166
167 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
168 << "\n\tY: " << MIY << "\n");
169 return true;
170}
171
172/// Core pair-eligibility check for a single VOPD encoding variant (VOPD or
173/// VOPD3). Returns the X/Y assignment on success, or std::nullopt otherwise.
174static std::optional<VOPDMatchInfo>
175tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily,
176 MachineInstr &FirstMI, MachineInstr &SecondMI,
177 bool IsVOPD3) {
178 unsigned Opc = FirstMI.getOpcode();
179 unsigned Opc2 = SecondMI.getOpcode();
180 AMDGPU::CanBeVOPD FirstCanBeVOPD =
181 AMDGPU::getCanBeVOPD(Opc, EncodingFamily, IsVOPD3);
182 AMDGPU::CanBeVOPD SecondCanBeVOPD =
183 AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, IsVOPD3);
184
185 // If SecondMI depends on FirstMI they cannot execute at the same time.
186 if (TII.hasRAWDependency(FirstMI, SecondMI))
187 return std::nullopt;
188
189 const GCNSubtarget &ST = TII.getSubtarget();
190 bool AllowSameVGPR = ST.hasGFX12Insts();
191
192 if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) {
193 if (checkVOPDRegConstraints(TII, FirstMI, SecondMI, IsVOPD3, AllowSameVGPR))
194 return VOPDMatchInfo{&FirstMI, &SecondMI, IsVOPD3};
195 }
196
197 // AllowSameVGPR relaxes the VGPR bank overlap check for source operands.
198 // Only enable it when there is no antidependency.
199 bool IsAntiDep = TII.hasRAWDependency(SecondMI, FirstMI);
200 AllowSameVGPR &= !IsAntiDep;
201
202 if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) {
203 if (IsAntiDep && !TII.isVOPDAntidependencyAllowed(SecondMI))
204 return std::nullopt;
205 if (checkVOPDRegConstraints(TII, SecondMI, FirstMI, IsVOPD3, AllowSameVGPR))
206 return VOPDMatchInfo{&SecondMI, &FirstMI, IsVOPD3};
207 }
208
209 return std::nullopt;
210}
211
212std::optional<VOPDMatchInfo> llvm::tryMatchVOPDPair(const SIInstrInfo &TII,
213 MachineInstr &FirstMI,
214 MachineInstr &SecondMI) {
215 const GCNSubtarget &ST = TII.getSubtarget();
216 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
217 if (auto Match = tryMatchVOPDPairVariant(TII, EncodingFamily, FirstMI,
218 SecondMI, /*IsVOPD3=*/false))
219 return Match;
220 if (ST.hasVOPD3())
221 return tryMatchVOPDPairVariant(TII, EncodingFamily, FirstMI, SecondMI,
222 /*IsVOPD3=*/true);
223 return std::nullopt;
224}
225
226/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
227/// together. Given SecondMI, when FirstMI is unspecified, then check if
228/// SecondMI may be part of a fused pair at all.
230 const TargetSubtargetInfo &TSI,
231 const MachineInstr *FirstMI,
232 const MachineInstr &SecondMI) {
233 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
234 const GCNSubtarget &ST = STII.getSubtarget();
235
236 // One instruction case: just check whether SecondMI is eligible at all.
237 if (!FirstMI) {
238 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
239 unsigned Opc2 = SecondMI.getOpcode();
240 auto checkCanBeVOPD = [&](bool VOPD3) {
241 AMDGPU::CanBeVOPD CanBeVOPD =
242 AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
243 return CanBeVOPD.Y || CanBeVOPD.X;
244 };
245 return checkCanBeVOPD(false) || (ST.hasVOPD3() && checkCanBeVOPD(true));
246 }
247
248#ifdef EXPENSIVE_CHECKS
249 assert([&]() -> bool {
250 for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
251 MII != FirstMI->getParent()->instr_end(); ++MII) {
252 if (&*MII == &SecondMI)
253 return true;
254 }
255 return false;
256 }() && "Expected FirstMI to precede SecondMI");
257#endif
258
259 return tryMatchVOPDPair(STII, *const_cast<MachineInstr *>(FirstMI),
260 const_cast<MachineInstr &>(SecondMI))
261 .has_value();
262}
263
264namespace {
265/// Adapts design from MacroFusion
266/// Puts valid candidate instructions back-to-back so they can easily
267/// be turned into VOPD instructions
268/// Greedily pairs instruction candidates. O(n^2) algorithm.
269struct VOPDPairingMutation : ScheduleDAGMutation {
270 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
271
272 VOPDPairingMutation(
273 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
275
276 void apply(ScheduleDAGInstrs *DAG) override {
277 const TargetInstrInfo &TII = *DAG->TII;
278 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
279 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
280 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
281 return;
282 }
283
284 std::vector<SUnit>::iterator ISUI, JSUI;
285 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
286 const MachineInstr *IMI = ISUI->getInstr();
287 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
288 continue;
289 if (!hasLessThanNumFused(*ISUI, 2))
290 continue;
291
292 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
293 if (JSUI->isBoundaryNode())
294 continue;
295 const MachineInstr *JMI = JSUI->getInstr();
296 if (!hasLessThanNumFused(*JSUI, 2) ||
297 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
298 continue;
299 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
300 break;
301 }
302 }
303 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
304 }
305};
306} // namespace
307
308std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
309 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
310}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static std::optional< VOPDMatchInfo > tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily, MachineInstr &FirstMI, MachineInstr &SecondMI, bool IsVOPD3)
Core pair-eligibility check for a single VOPD encoding variant (VOPD or VOPD3).
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3, bool AllowSameVGPR)
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition MacroFusion.h:33
std::optional< VOPDMatchInfo > tryMatchVOPDPair(const SIInstrInfo &TII, MachineInstr &FirstMI, MachineInstr &SecondMI)
Check whether FirstMI and SecondMI can be combined into a VOPD instruction.
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Describes a matched VOPD pair: which instruction is the X component and which is the Y component,...