LLVM 23.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &MIX,
39 const MachineInstr &MIY, bool IsVOPD3,
40 bool AllowSameVGPR) {
41 namespace VOPD = AMDGPU::VOPD;
42
43 const MachineFunction *MF = MIX.getMF();
44 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
45
46 if (IsVOPD3 && !ST.hasVOPD3())
47 return false;
48 if (!IsVOPD3 && (TII.isVOP3(MIX) || TII.isVOP3(MIY)))
49 return false;
50 if (TII.isDPP(MIX) || TII.isDPP(MIY))
51 return false;
52
53 const SIRegisterInfo *TRI = ST.getRegisterInfo();
54 const MachineRegisterInfo &MRI = MF->getRegInfo();
55 // Literals also count against scalar bus limit
57 auto addLiteral = [&](const MachineOperand &Op) {
58 for (auto &Literal : UniqueLiterals) {
59 if (Literal->isIdenticalTo(Op))
60 return;
61 }
62 UniqueLiterals.push_back(&Op);
63 };
64 SmallSet<Register, 4> UniqueScalarRegs;
65
66 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
67 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
68 const MachineOperand &Operand = MI.getOperand(OperandIdx);
69 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
70 return Operand.getReg();
71 return Register();
72 };
73
74 auto InstInfo = AMDGPU::getVOPDInstInfo(MIX.getDesc(), MIY.getDesc());
75
76 for (auto CompIdx : VOPD::COMPONENTS) {
77 const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
78
79 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
80 if (Src0.isReg()) {
81 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
82 UniqueScalarRegs.insert(Src0.getReg());
83 }
84 } else if (!TII.isInlineConstant(Src0)) {
85 if (IsVOPD3)
86 return false;
87 addLiteral(Src0);
88 }
89
90 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
91 if (IsVOPD3)
92 return false;
93
94 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
95 addLiteral(MI.getOperand(CompOprIdx));
96 }
97 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
98 UniqueScalarRegs.insert(AMDGPU::VCC_LO);
99
100 if (IsVOPD3) {
101 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
102 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
103 if (!Src)
104 continue;
105 if (OpName == AMDGPU::OpName::src2) {
106 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
107 continue;
108 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
109 UniqueScalarRegs.insert(Src->getReg());
110 continue;
111 }
112 }
113 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
114 return false;
115 }
116
117 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
118 AMDGPU::OpName::op_sel}) {
119 if (TII.hasModifiersSet(MI, OpName))
120 return false;
121 }
122
123 // Neg is allowed, other modifiers are not. NB: even though sext has the
124 // same value as neg, there are no combinable instructions with sext.
125 for (auto OpName :
126 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
127 AMDGPU::OpName::src2_modifiers}) {
128 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
129 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
130 return false;
131 }
132 }
133 }
134
135 if (UniqueLiterals.size() > 1)
136 return false;
137 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
138 return false;
139
140 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
141 // source-cache.
142 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
143 MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
144 MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
145
146 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
147 IsVOPD3))
148 return false;
149
150 if (IsVOPD3) {
151 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
152 // MIX check is only relevant to scheduling?
153 if (AMDGPU::hasNamedOperand(MIX.getOpcode(), AMDGPU::OpName::bitop3)) {
154 const MachineOperand &Src2 =
155 *TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
156 if (!Src2.isImm() || Src2.getImm())
157 return false;
158 }
159 if (AMDGPU::hasNamedOperand(MIY.getOpcode(), AMDGPU::OpName::bitop3)) {
160 const MachineOperand &Src2 =
161 *TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
162 if (!Src2.isImm() || Src2.getImm())
163 return false;
164 }
165 }
166
167 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
168 << "\n\tY: " << MIY << "\n");
169 return true;
170}
171
172/// Core pair-eligibility check for a single VOPD encoding variant (VOPD or
173/// VOPD3). Returns the X/Y assignment on success, or std::nullopt otherwise.
174static std::optional<VOPDMatchInfo>
175tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily,
176 MachineInstr &FirstMI, MachineInstr &SecondMI,
177 bool IsVOPD3) {
178 unsigned Opc = FirstMI.getOpcode();
179 unsigned Opc2 = SecondMI.getOpcode();
180 AMDGPU::CanBeVOPD FirstCanBeVOPD =
181 AMDGPU::getCanBeVOPD(Opc, EncodingFamily, IsVOPD3);
182 AMDGPU::CanBeVOPD SecondCanBeVOPD =
183 AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, IsVOPD3);
184
185 if (!(FirstCanBeVOPD.X && SecondCanBeVOPD.Y) &&
186 !(FirstCanBeVOPD.Y && SecondCanBeVOPD.X))
187 return std::nullopt;
188
189 // If SecondMI depends on FirstMI they cannot execute at the same time.
190 if (TII.hasRAWDependency(FirstMI, SecondMI))
191 return std::nullopt;
192
193 const GCNSubtarget &ST = TII.getSubtarget();
194 bool AllowSameVGPR = ST.hasGFX12Insts();
195
196 if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) {
197 if (checkVOPDRegConstraints(TII, FirstMI, SecondMI, IsVOPD3, AllowSameVGPR))
198 return VOPDMatchInfo{&FirstMI, &SecondMI, IsVOPD3};
199 }
200
201 if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) {
202 // AllowSameVGPR relaxes the VGPR bank overlap check for source operands.
203 // Only enable it when there is no antidependency.
204 bool IsAntiDep = TII.hasRAWDependency(SecondMI, FirstMI);
205 AllowSameVGPR &= !IsAntiDep;
206 if (IsAntiDep && !TII.isVOPDAntidependencyAllowed(SecondMI))
207 return std::nullopt;
208 if (checkVOPDRegConstraints(TII, SecondMI, FirstMI, IsVOPD3, AllowSameVGPR))
209 return VOPDMatchInfo{&SecondMI, &FirstMI, IsVOPD3};
210 }
211
212 return std::nullopt;
213}
214
215std::optional<VOPDMatchInfo> llvm::tryMatchVOPDPair(const SIInstrInfo &TII,
216 MachineInstr &FirstMI,
217 MachineInstr &SecondMI) {
218 const GCNSubtarget &ST = TII.getSubtarget();
219 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
220 if (auto Match = tryMatchVOPDPairVariant(TII, EncodingFamily, FirstMI,
221 SecondMI, /*IsVOPD3=*/false))
222 return Match;
223 if (ST.hasVOPD3())
224 return tryMatchVOPDPairVariant(TII, EncodingFamily, FirstMI, SecondMI,
225 /*IsVOPD3=*/true);
226 return std::nullopt;
227}
228
229/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
230/// together. Given SecondMI, when FirstMI is unspecified, then check if
231/// SecondMI may be part of a fused pair at all.
233 const TargetSubtargetInfo &TSI,
234 const MachineInstr *FirstMI,
235 const MachineInstr &SecondMI) {
236 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
237 const GCNSubtarget &ST = STII.getSubtarget();
238
239 // One instruction case: just check whether SecondMI is eligible at all.
240 if (!FirstMI) {
241 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
242 unsigned Opc2 = SecondMI.getOpcode();
243 auto checkCanBeVOPD = [&](bool VOPD3) {
244 AMDGPU::CanBeVOPD CanBeVOPD =
245 AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
246 return CanBeVOPD.Y || CanBeVOPD.X;
247 };
248 return checkCanBeVOPD(false) || (ST.hasVOPD3() && checkCanBeVOPD(true));
249 }
250
251#ifdef EXPENSIVE_CHECKS
252 assert([&]() -> bool {
253 for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
254 MII != FirstMI->getParent()->instr_end(); ++MII) {
255 if (&*MII == &SecondMI)
256 return true;
257 }
258 return false;
259 }() && "Expected FirstMI to precede SecondMI");
260#endif
261
262 return tryMatchVOPDPair(STII, *const_cast<MachineInstr *>(FirstMI),
263 const_cast<MachineInstr &>(SecondMI))
264 .has_value();
265}
266
267/// Collect all load (dependents if \p Forward else dependencies) that connect
268/// to the \p Head SU.
269/// \p Visited should allocate enough bits for the number of SUnits, but its
270/// value can otherwise be uninitialized.
271static void collectLoads(SmallPtrSet<SUnit *, 8> &Loads, BitVector &Visited,
272 SUnit &Head, bool Forward, bool StopAtLoads) {
273 if (Head.isBoundaryNode())
274 return;
275
276 Visited.reset();
277
279 Stack.push_back(&Head);
280 while (!Stack.empty()) {
281 SUnit *SU = Stack.pop_back_val();
282 const SmallVector<SDep, 4> &Deps = Forward ? SU->Succs : SU->Preds;
283 for (const SDep &Edge : Deps) {
284 if (StopAtLoads && Edge.getKind() != SDep::Data)
285 continue;
286 SUnit *Dep = Edge.getSUnit();
287 if (Dep->isBoundaryNode() || Visited.test(Dep->NodeNum))
288 continue;
289 Visited.set(Dep->NodeNum);
290
291 if (Dep->isInstr() && Dep->getInstr()->mayLoad()) {
292 Loads.insert(Dep);
293 if (StopAtLoads)
294 continue;
295 }
296 Stack.push_back(Dep);
297 }
298 }
299}
300
301/// Checks whether fusing SU \p I with SU \p J would force the loads preceding
302/// \p J to complete before loads depending on \p I.
303///
304/// \p ILoadSuccs should hold all first load successors of \p I (via
305/// collectLoads with StopAtLoads=true). For set bits in \p LoadPredsComputed,
306/// the corresponding set in \p LoadPredsCache should hold all transitive load
307/// dependencies (via collectLoads with StopAtLoads=false). The \p Scratch
308/// bitvector should allocate enough bits for the number of SUnits.
309static bool loadsMayOverlap(
310 [[maybe_unused]] SUnit &I, const SmallPtrSet<SUnit *, 8> &ILoadSuccs,
311 SUnit &J, BitVector &LoadPredsComputed,
312 SmallVector<SmallPtrSet<SUnit *, 8>> &LoadPredsCache, BitVector &Scratch) {
313
314 if (ILoadSuccs.empty())
315 return false;
316
317 SmallPtrSet<SUnit *, 8> &JLoadPreds = LoadPredsCache[J.NodeNum];
318 if (!LoadPredsComputed.test(J.NodeNum)) {
319 collectLoads(JLoadPreds, Scratch, J, /*Forward=*/false,
320 /*StopAtLoads=*/true);
321 LoadPredsComputed.set(J.NodeNum);
322 }
323 if (JLoadPreds.empty())
324 return false;
325
326 for (SUnit *ILoad : ILoadSuccs) {
327 SmallPtrSet<SUnit *, 8> &ILoadDeps = LoadPredsCache[ILoad->NodeNum];
328 if (!LoadPredsComputed.test(ILoad->NodeNum)) {
329 collectLoads(ILoadDeps, Scratch, *ILoad, /*Forward=*/false,
330 /*StopAtLoads=*/false);
331 LoadPredsComputed.set(ILoad->NodeNum);
332 }
333
334 for (SUnit *JLoad : JLoadPreds) {
335 if (ILoad == JLoad) {
337 dbgs() << "Will not pair SU(" << I.NodeNum << ") with SU("
338 << J.NodeNum << ")\n"
339 << " Fusion would introduce a cyclic dependency with SU("
340 << ILoad->NodeNum << ")\n");
341 return true;
342 }
343
344 if (!ILoadDeps.contains(JLoad)) {
345 LLVM_DEBUG(dbgs() << "Will not pair SU(" << I.NodeNum << ") with SU("
346 << J.NodeNum << ")\n"
347 << " Fusion may force SU(" << JLoad->NodeNum
348 << ") to complete its load before dispatching SU("
349 << ILoad->NodeNum << ")\n");
350 return true;
351 }
352 }
353 }
354 return false;
355}
356
357namespace {
358/// Adapts design from MacroFusion
359/// Puts valid candidate instructions back-to-back so they can easily
360/// be turned into VOPD instructions
361/// Greedily pairs instruction candidates. O(n^2) algorithm.
362struct VOPDPairingMutation : ScheduleDAGMutation {
363 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
364
365 VOPDPairingMutation(
366 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
368
369 void apply(ScheduleDAGInstrs *DAG) override {
370 const TargetInstrInfo &TII = *DAG->TII;
371 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
372 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
373 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
374 return;
375 }
376
377 BitVector VOPDCapable(DAG->SUnits.size());
378 unsigned IIdx = 0;
379 // Pre-compute whether each individual instruction can be VOPD
380 for (auto ISUI = DAG->SUnits.begin(), E = DAG->SUnits.end(); ISUI != E;
381 ++ISUI, ++IIdx) {
382 const MachineInstr *IMI = ISUI->getInstr();
383 if (shouldScheduleAdjacent(TII, ST, nullptr, *IMI) &&
384 hasLessThanNumFused(*ISUI, 2))
385 VOPDCapable[IIdx] = true;
386 }
387
388 IIdx = 0;
389 SmallPtrSet<SUnit *, 8> ILoadSuccs;
390
391 // Cache collected load predecessors.
392 // For VOPDCapable nodes, this caches collectLoads with StopAtLoads=true
393 // For loads, this caches collectLoads with StopAtLoads=false
394 BitVector LoadPredsComputed(DAG->SUnits.size());
395 SmallVector<SmallPtrSet<SUnit *, 8>> LoadPredsCache(DAG->SUnits.size());
396
397 BitVector Scratch(DAG->SUnits.size());
398 for (auto ISUI = DAG->SUnits.begin(), E = DAG->SUnits.end(); ISUI != E;
399 ++ISUI, ++IIdx) {
400 if (!VOPDCapable[IIdx])
401 continue;
402 const MachineInstr *IMI = ISUI->getInstr();
403
404 ILoadSuccs.clear();
405 collectLoads(ILoadSuccs, Scratch, *ISUI, /*Forward=*/true,
406 /*StopAtLoads=*/true);
407
408 unsigned JIdx = IIdx + 1;
409 for (auto JSUI = ISUI + 1; JSUI != E; ++JSUI, ++JIdx) {
410 if (!VOPDCapable[JIdx] || JSUI->isBoundaryNode())
411 continue;
412 const MachineInstr *JMI = JSUI->getInstr();
413 if (!hasLessThanNumFused(*JSUI, 2) ||
414 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
415 continue;
416
417 if (loadsMayOverlap(*ISUI, ILoadSuccs, *JSUI, LoadPredsComputed,
418 LoadPredsCache, Scratch))
419 continue;
420
421 if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) {
422 // Clear to prevent future checks/fusing
423 VOPDCapable[JIdx] = false;
424 break;
425 }
426 }
427 }
428 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
429 }
430};
431} // namespace
432
433std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
434 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
435}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
static void collectLoads(SmallPtrSet< SUnit *, 8 > &Loads, BitVector &Visited, SUnit &Head, bool Forward, bool StopAtLoads)
Collect all load (dependents if Forward else dependencies) that connect to the Head SU.
static std::optional< VOPDMatchInfo > tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily, MachineInstr &FirstMI, MachineInstr &SecondMI, bool IsVOPD3)
Core pair-eligibility check for a single VOPD encoding variant (VOPD or VOPD3).
static bool loadsMayOverlap(SUnit &I, const SmallPtrSet< SUnit *, 8 > &ILoadSuccs, SUnit &J, BitVector &LoadPredsComputed, SmallVector< SmallPtrSet< SUnit *, 8 > > &LoadPredsCache, BitVector &Scratch)
Checks whether fusing SU I with SU J would force the loads preceding J to complete before loads depen...
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Scheduling dependency.
Definition ScheduleDAG.h:51
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
const GCNSubtarget & getSubtarget() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3, bool AllowSameVGPR)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition MacroFusion.h:33
std::optional< VOPDMatchInfo > tryMatchVOPDPair(const SIInstrInfo &TII, MachineInstr &FirstMI, MachineInstr &SecondMI)
Check whether FirstMI and SecondMI can be combined into a VOPD instruction.
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Describes a matched VOPD pair: which instruction is the X component and which is the Y component,...