LLVM 23.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &MIX,
39 const MachineInstr &MIY, bool IsVOPD3,
40 bool AllowSameVGPR) {
41 namespace VOPD = AMDGPU::VOPD;
42
43 const MachineFunction *MF = MIX.getMF();
44 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
45
46 if (IsVOPD3 && !ST.hasVOPD3())
47 return false;
48 if (!IsVOPD3 && (TII.isVOP3(MIX) || TII.isVOP3(MIY)))
49 return false;
50 if (TII.isDPP(MIX) || TII.isDPP(MIY))
51 return false;
52
53 const SIRegisterInfo *TRI = ST.getRegisterInfo();
54 const MachineRegisterInfo &MRI = MF->getRegInfo();
55 // Literals also count against scalar bus limit
57 auto addLiteral = [&](const MachineOperand &Op) {
58 for (auto &Literal : UniqueLiterals) {
59 if (Literal->isIdenticalTo(Op))
60 return;
61 }
62 UniqueLiterals.push_back(&Op);
63 };
64 SmallSet<Register, 4> UniqueScalarRegs;
65
66 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
67 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
68 const MachineOperand &Operand = MI.getOperand(OperandIdx);
69 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
70 return Operand.getReg();
71 return Register();
72 };
73
74 auto InstInfo = AMDGPU::getVOPDInstInfo(MIX.getDesc(), MIY.getDesc());
75
76 for (auto CompIdx : VOPD::COMPONENTS) {
77 const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
78
79 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
80 if (Src0.isReg()) {
81 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
82 UniqueScalarRegs.insert(Src0.getReg());
83 }
84 } else if (!TII.isInlineConstant(Src0)) {
85 if (IsVOPD3)
86 return false;
87 addLiteral(Src0);
88 }
89
90 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
91 if (IsVOPD3)
92 return false;
93
94 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
95 addLiteral(MI.getOperand(CompOprIdx));
96 }
97 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
98 UniqueScalarRegs.insert(AMDGPU::VCC_LO);
99
100 if (IsVOPD3) {
101 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
102 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
103 if (!Src)
104 continue;
105 if (OpName == AMDGPU::OpName::src2) {
106 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
107 continue;
108 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
109 UniqueScalarRegs.insert(Src->getReg());
110 continue;
111 }
112 }
113 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
114 return false;
115 }
116
117 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
118 AMDGPU::OpName::op_sel}) {
119 if (TII.hasModifiersSet(MI, OpName))
120 return false;
121 }
122
123 // Neg is allowed, other modifiers are not. NB: even though sext has the
124 // same value as neg, there are no combinable instructions with sext.
125 for (auto OpName :
126 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
127 AMDGPU::OpName::src2_modifiers}) {
128 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
129 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
130 return false;
131 }
132 }
133 }
134
135 if (UniqueLiterals.size() > 1)
136 return false;
137 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
138 return false;
139
140 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
141 // source-cache.
142 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
143 MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
144 MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
145
146 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
147 IsVOPD3))
148 return false;
149
150 if (IsVOPD3) {
151 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
152 // MIX check is only relevant to scheduling?
153 if (AMDGPU::hasNamedOperand(MIX.getOpcode(), AMDGPU::OpName::bitop3)) {
154 const MachineOperand &Src2 =
155 *TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
156 if (!Src2.isImm() || Src2.getImm())
157 return false;
158 }
159 if (AMDGPU::hasNamedOperand(MIY.getOpcode(), AMDGPU::OpName::bitop3)) {
160 const MachineOperand &Src2 =
161 *TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
162 if (!Src2.isImm() || Src2.getImm())
163 return false;
164 }
165 }
166
167 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
168 << "\n\tY: " << MIY << "\n");
169 return true;
170}
171
172/// Core pair-eligibility check for a single VOPD encoding variant (VOPD or
173/// VOPD3). Returns the X/Y assignment on success, or std::nullopt otherwise.
174static std::optional<VOPDMatchInfo>
175tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily,
176 MachineInstr &FirstMI, MachineInstr &SecondMI,
177 bool IsVOPD3) {
178 unsigned Opc = FirstMI.getOpcode();
179 unsigned Opc2 = SecondMI.getOpcode();
180 AMDGPU::CanBeVOPD FirstCanBeVOPD =
181 AMDGPU::getCanBeVOPD(Opc, EncodingFamily, IsVOPD3);
182 AMDGPU::CanBeVOPD SecondCanBeVOPD =
183 AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, IsVOPD3);
184
185 if (!(FirstCanBeVOPD.X && SecondCanBeVOPD.Y) &&
186 !(FirstCanBeVOPD.Y && SecondCanBeVOPD.X))
187 return std::nullopt;
188
189 // If SecondMI depends on FirstMI they cannot execute at the same time.
190 if (TII.hasRAWDependency(FirstMI, SecondMI))
191 return std::nullopt;
192
193 const GCNSubtarget &ST = TII.getSubtarget();
194 bool AllowSameVGPR = ST.hasGFX12Insts();
195
196 if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) {
197 if (checkVOPDRegConstraints(TII, FirstMI, SecondMI, IsVOPD3, AllowSameVGPR))
198 return VOPDMatchInfo{&FirstMI, &SecondMI, IsVOPD3};
199 }
200
201 if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) {
202 // AllowSameVGPR relaxes the VGPR bank overlap check for source operands.
203 // Only enable it when there is no antidependency.
204 bool IsAntiDep = TII.hasRAWDependency(SecondMI, FirstMI);
205 AllowSameVGPR &= !IsAntiDep;
206 if (IsAntiDep && !TII.isVOPDAntidependencyAllowed(SecondMI))
207 return std::nullopt;
208 if (checkVOPDRegConstraints(TII, SecondMI, FirstMI, IsVOPD3, AllowSameVGPR))
209 return VOPDMatchInfo{&SecondMI, &FirstMI, IsVOPD3};
210 }
211
212 return std::nullopt;
213}
214
215std::optional<VOPDMatchInfo> llvm::tryMatchVOPDPair(const SIInstrInfo &TII,
216 MachineInstr &FirstMI,
217 MachineInstr &SecondMI) {
218 const GCNSubtarget &ST = TII.getSubtarget();
219 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
220 if (auto Match = tryMatchVOPDPairVariant(TII, EncodingFamily, FirstMI,
221 SecondMI, /*IsVOPD3=*/false))
222 return Match;
223 if (ST.hasVOPD3())
224 return tryMatchVOPDPairVariant(TII, EncodingFamily, FirstMI, SecondMI,
225 /*IsVOPD3=*/true);
226 return std::nullopt;
227}
228
229/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
230/// together. Given SecondMI, when FirstMI is unspecified, then check if
231/// SecondMI may be part of a fused pair at all.
233 const TargetSubtargetInfo &TSI,
234 const MachineInstr *FirstMI,
235 const MachineInstr &SecondMI) {
236 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
237 const GCNSubtarget &ST = STII.getSubtarget();
238
239 // One instruction case: just check whether SecondMI is eligible at all.
240 if (!FirstMI) {
241 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
242 unsigned Opc2 = SecondMI.getOpcode();
243 auto checkCanBeVOPD = [&](bool VOPD3) {
244 AMDGPU::CanBeVOPD CanBeVOPD =
245 AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
246 return CanBeVOPD.Y || CanBeVOPD.X;
247 };
248 return checkCanBeVOPD(false) || (ST.hasVOPD3() && checkCanBeVOPD(true));
249 }
250
251#ifdef EXPENSIVE_CHECKS
252 assert([&]() -> bool {
253 for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
254 MII != FirstMI->getParent()->instr_end(); ++MII) {
255 if (&*MII == &SecondMI)
256 return true;
257 }
258 return false;
259 }() && "Expected FirstMI to precede SecondMI");
260#endif
261
262 return tryMatchVOPDPair(STII, *const_cast<MachineInstr *>(FirstMI),
263 const_cast<MachineInstr &>(SecondMI))
264 .has_value();
265}
266
267namespace {
268/// Adapts design from MacroFusion
269/// Puts valid candidate instructions back-to-back so they can easily
270/// be turned into VOPD instructions
271/// Greedily pairs instruction candidates. O(n^2) algorithm.
272struct VOPDPairingMutation : ScheduleDAGMutation {
273 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
274
275 VOPDPairingMutation(
276 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
278
279 void apply(ScheduleDAGInstrs *DAG) override {
280 const TargetInstrInfo &TII = *DAG->TII;
281 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
282 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
283 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
284 return;
285 }
286
287 std::vector<SUnit>::iterator ISUI, JSUI;
288 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
289 const MachineInstr *IMI = ISUI->getInstr();
290 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
291 continue;
292 if (!hasLessThanNumFused(*ISUI, 2))
293 continue;
294
295 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
296 if (JSUI->isBoundaryNode())
297 continue;
298 const MachineInstr *JMI = JSUI->getInstr();
299 if (!hasLessThanNumFused(*JSUI, 2) ||
300 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
301 continue;
302 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
303 break;
304 }
305 }
306 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
307 }
308};
309} // namespace
310
311std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
312 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
313}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static std::optional< VOPDMatchInfo > tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily, MachineInstr &FirstMI, MachineInstr &SecondMI, bool IsVOPD3)
Core pair-eligibility check for a single VOPD encoding variant (VOPD or VOPD3).
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3, bool AllowSameVGPR)
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition MacroFusion.h:33
std::optional< VOPDMatchInfo > tryMatchVOPDPair(const SIInstrInfo &TII, MachineInstr &FirstMI, MachineInstr &SecondMI)
Check whether FirstMI and SecondMI can be combined into a VOPD instruction.
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Describes a matched VOPD pair: which instruction is the X component and which is the Y component,...