LLVM 23.0.0git
SIPeepholeSDWA.cpp
Go to the documentation of this file.
1//===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass tries to apply several peephole SDWA patterns.
10///
11/// E.g. original:
12/// V_LSHRREV_B32_e32 %0, 16, %1
13/// V_ADD_CO_U32_e32 %2, %0, %3
14/// V_LSHLREV_B32_e32 %4, 16, %2
15///
16/// Replace:
17/// V_ADD_CO_U32_sdwa %4, %1, %3
18/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
19///
20//===----------------------------------------------------------------------===//
21
22#include "SIPeepholeSDWA.h"
23#include "AMDGPU.h"
24#include "GCNSubtarget.h"
26#include "llvm/ADT/MapVector.h"
27#include "llvm/ADT/Statistic.h"
29#include <optional>
30
31using namespace llvm;
32
33#define DEBUG_TYPE "si-peephole-sdwa"
34
35STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
36STATISTIC(NumSDWAInstructionsPeepholed,
37 "Number of instruction converted to SDWA.");
38
39namespace {
40
41bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST,
42 const SIInstrInfo *TII);
43class SDWAOperand;
44class SDWADstOperand;
45
46using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
48
49class SIPeepholeSDWA {
50private:
52 const SIRegisterInfo *TRI;
53 const SIInstrInfo *TII;
54
56 SDWAOperandsMap PotentialMatches;
57 SmallVector<MachineInstr *, 8> ConvertedInstructions;
58
59 std::optional<int64_t> foldToImm(const MachineOperand &Op) const;
60
61 void matchSDWAOperands(MachineBasicBlock &MBB);
62 std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
63 void pseudoOpConvertToVOP2(MachineInstr &MI,
64 const GCNSubtarget &ST) const;
65 void convertVcndmaskToVOP2(MachineInstr &MI, const GCNSubtarget &ST) const;
66 MachineInstr *createSDWAVersion(MachineInstr &MI);
67 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
68 void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
69
70public:
71 bool run(MachineFunction &MF);
72};
73
74class SIPeepholeSDWALegacy : public MachineFunctionPass {
75public:
76 static char ID;
77
78 SIPeepholeSDWALegacy() : MachineFunctionPass(ID) {}
79
80 StringRef getPassName() const override { return "SI Peephole SDWA"; }
81
82 bool runOnMachineFunction(MachineFunction &MF) override;
83
84 void getAnalysisUsage(AnalysisUsage &AU) const override {
85 AU.setPreservesCFG();
87 }
88};
89
90using namespace AMDGPU::SDWA;
91
92class SDWAOperand {
93private:
94 MachineOperand *Target; // Operand that would be used in converted instruction
95 MachineOperand *Replaced; // Operand that would be replace by Target
96
97 /// Returns true iff the SDWA selection of this SDWAOperand can be combined
98 /// with the SDWA selections of its uses in \p MI.
99 virtual bool canCombineSelections(const MachineInstr &MI,
100 const SIInstrInfo *TII) = 0;
101
102public:
103 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
104 : Target(TargetOp), Replaced(ReplacedOp) {
105 assert(Target->isReg());
106 assert(Replaced->isReg());
107 }
108
109 virtual ~SDWAOperand() = default;
110
111 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII,
112 const GCNSubtarget &ST,
113 SDWAOperandsMap *PotentialMatches = nullptr) = 0;
114 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
115
116 MachineOperand *getTargetOperand() const { return Target; }
117 MachineOperand *getReplacedOperand() const { return Replaced; }
118 MachineInstr *getParentInst() const { return Target->getParent(); }
119
120 MachineRegisterInfo *getMRI() const {
121 return &getParentInst()->getMF()->getRegInfo();
122 }
123
124#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
125 virtual void print(raw_ostream& OS) const = 0;
126 void dump() const { print(dbgs()); }
127#endif
128};
129
130class SDWASrcOperand : public SDWAOperand {
131private:
132 SdwaSel SrcSel;
133 bool Abs;
134 bool Neg;
135 bool Sext;
136
137public:
138 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
139 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
140 bool Sext_ = false)
141 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
142 Neg(Neg_), Sext(Sext_) {}
143
144 MachineInstr *potentialToConvert(const SIInstrInfo *TII,
145 const GCNSubtarget &ST,
146 SDWAOperandsMap *PotentialMatches = nullptr) override;
147 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
148 bool canCombineSelections(const MachineInstr &MI,
149 const SIInstrInfo *TII) override;
150
151 SdwaSel getSrcSel() const { return SrcSel; }
152 bool getAbs() const { return Abs; }
153 bool getNeg() const { return Neg; }
154 bool getSext() const { return Sext; }
155
156 uint64_t getSrcMods(const SIInstrInfo *TII,
157 const MachineOperand *SrcOp) const;
158
159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
160 void print(raw_ostream& OS) const override;
161#endif
162};
163
164class SDWADstOperand : public SDWAOperand {
165private:
166 SdwaSel DstSel;
167 DstUnused DstUn;
168
169public:
170 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
171 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
172 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
173
174 MachineInstr *potentialToConvert(const SIInstrInfo *TII,
175 const GCNSubtarget &ST,
176 SDWAOperandsMap *PotentialMatches = nullptr) override;
177 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
178 bool canCombineSelections(const MachineInstr &MI,
179 const SIInstrInfo *TII) override;
180
181 SdwaSel getDstSel() const { return DstSel; }
182 DstUnused getDstUnused() const { return DstUn; }
183
184#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
185 void print(raw_ostream& OS) const override;
186#endif
187};
188
189class SDWADstPreserveOperand : public SDWADstOperand {
190private:
191 MachineOperand *Preserve;
192
193public:
194 SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
195 MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
196 : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
197 Preserve(PreserveOp) {}
198
199 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
200 bool canCombineSelections(const MachineInstr &MI,
201 const SIInstrInfo *TII) override;
202
203 MachineOperand *getPreservedOperand() const { return Preserve; }
204
205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
206 void print(raw_ostream& OS) const override;
207#endif
208};
209
210} // end anonymous namespace
211
212INITIALIZE_PASS(SIPeepholeSDWALegacy, DEBUG_TYPE, "SI Peephole SDWA", false,
213 false)
214
215char SIPeepholeSDWALegacy::ID = 0;
216
217char &llvm::SIPeepholeSDWALegacyID = SIPeepholeSDWALegacy::ID;
218
220 return new SIPeepholeSDWALegacy();
221}
222
223#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
225 switch(Sel) {
226 case BYTE_0: OS << "BYTE_0"; break;
227 case BYTE_1: OS << "BYTE_1"; break;
228 case BYTE_2: OS << "BYTE_2"; break;
229 case BYTE_3: OS << "BYTE_3"; break;
230 case WORD_0: OS << "WORD_0"; break;
231 case WORD_1: OS << "WORD_1"; break;
232 case DWORD: OS << "DWORD"; break;
233 }
234 return OS;
235}
236
238 switch(Un) {
239 case UNUSED_PAD: OS << "UNUSED_PAD"; break;
240 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
241 case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
242 }
243 return OS;
244}
245
247void SDWASrcOperand::print(raw_ostream& OS) const {
248 OS << "SDWA src: " << *getTargetOperand()
249 << " src_sel:" << getSrcSel()
250 << " abs:" << getAbs() << " neg:" << getNeg()
251 << " sext:" << getSext() << '\n';
252}
253
255void SDWADstOperand::print(raw_ostream& OS) const {
256 OS << "SDWA dst: " << *getTargetOperand()
257 << " dst_sel:" << getDstSel()
258 << " dst_unused:" << getDstUnused() << '\n';
259}
260
262void SDWADstPreserveOperand::print(raw_ostream& OS) const {
263 OS << "SDWA preserve dst: " << *getTargetOperand()
264 << " dst_sel:" << getDstSel()
265 << " preserve:" << *getPreservedOperand() << '\n';
266}
267
268#endif
269
270static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
271 assert(To.isReg() && From.isReg());
272 To.setReg(From.getReg());
273 To.setSubReg(From.getSubReg());
274 To.setIsUndef(From.isUndef());
275 if (To.isUse()) {
276 To.setIsKill(From.isKill());
277 } else {
278 To.setIsDead(From.isDead());
279 }
280}
281
282static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
283 return LHS.isReg() &&
284 RHS.isReg() &&
285 LHS.getReg() == RHS.getReg() &&
286 LHS.getSubReg() == RHS.getSubReg();
287}
288
290 const MachineRegisterInfo *MRI) {
291 if (!Reg->isReg() || !Reg->isDef())
292 return nullptr;
293
294 return MRI->getOneNonDBGUse(Reg->getReg());
295}
296
298 const MachineRegisterInfo *MRI) {
299 if (!Reg->isReg())
300 return nullptr;
301
302 return MRI->getOneDef(Reg->getReg());
303}
304
305/// Combine an SDWA instruction's existing SDWA selection \p Sel with
306/// the SDWA selection \p OperandSel of its operand. If the selections
307/// are compatible, return the combined selection, otherwise return a
308/// nullopt.
309/// For example, if we have Sel = BYTE_0 Sel and OperandSel = WORD_1:
310/// BYTE_0 Sel (WORD_1 Sel (%X)) -> BYTE_2 Sel (%X)
311static std::optional<SdwaSel> combineSdwaSel(SdwaSel Sel, SdwaSel OperandSel) {
312 if (Sel == SdwaSel::DWORD)
313 return OperandSel;
314
315 if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)
316 return Sel;
317
318 if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||
319 Sel == SdwaSel::BYTE_3)
320 return {};
321
322 if (OperandSel == SdwaSel::WORD_0)
323 return Sel;
324
325 if (OperandSel == SdwaSel::WORD_1) {
326 if (Sel == SdwaSel::BYTE_0)
327 return SdwaSel::BYTE_2;
328 if (Sel == SdwaSel::BYTE_1)
329 return SdwaSel::BYTE_3;
330 if (Sel == SdwaSel::WORD_0)
331 return SdwaSel::WORD_1;
332 }
333
334 return {};
335}
336
337uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
338 const MachineOperand *SrcOp) const {
339 uint64_t Mods = 0;
340 const auto *MI = SrcOp->getParent();
341 if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
342 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
343 Mods = Mod->getImm();
344 }
345 } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
346 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
347 Mods = Mod->getImm();
348 }
349 }
350 if (Abs || Neg) {
351 assert(!Sext &&
352 "Float and integer src modifiers can't be set simultaneously");
353 Mods |= Abs ? SISrcMods::ABS : 0u;
354 Mods ^= Neg ? SISrcMods::NEG : 0u;
355 } else if (Sext) {
356 Mods |= SISrcMods::SEXT;
357 }
358
359 return Mods;
360}
361
362MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
363 const GCNSubtarget &ST,
364 SDWAOperandsMap *PotentialMatches) {
365 if (PotentialMatches != nullptr) {
366 // Fill out the map for all uses if all can be converted
367 MachineOperand *Reg = getReplacedOperand();
368 if (!Reg->isReg() || !Reg->isDef())
369 return nullptr;
370
371 for (MachineInstr &UseMI : getMRI()->use_nodbg_instructions(Reg->getReg()))
372 // Check that all instructions that use Reg can be converted
373 if (!isConvertibleToSDWA(UseMI, ST, TII) ||
374 !canCombineSelections(UseMI, TII))
375 return nullptr;
376
377 // Now that it's guaranteed all uses are legal, iterate over the uses again
378 // to add them for later conversion.
379 for (MachineOperand &UseMO : getMRI()->use_nodbg_operands(Reg->getReg())) {
380 // Should not get a subregister here
381 assert(isSameReg(UseMO, *Reg));
382
383 SDWAOperandsMap &potentialMatchesMap = *PotentialMatches;
384 MachineInstr *UseMI = UseMO.getParent();
385 potentialMatchesMap[UseMI].push_back(this);
386 }
387 return nullptr;
388 }
389
390 // For SDWA src operand potential instruction is one that use register
391 // defined by parent instruction
392 MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
393 if (!PotentialMO)
394 return nullptr;
395
396 MachineInstr *Parent = PotentialMO->getParent();
397
398 return canCombineSelections(*Parent, TII) ? Parent : nullptr;
399}
400
401bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
402 assert((!Sext || !TII->getSubtarget().zeroesHigh16BitsOfDest(
403 getParentInst()->getOpcode())) &&
404 "Cannot use sign-extension with instruction that zeroes high bits");
405 switch (MI.getOpcode()) {
406 case AMDGPU::V_CVT_F32_FP8_sdwa:
407 case AMDGPU::V_CVT_F32_BF8_sdwa:
408 case AMDGPU::V_CVT_PK_F32_FP8_sdwa:
409 case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
410 // Does not support input modifiers: noabs, noneg, nosext.
411 return false;
412 case AMDGPU::V_CNDMASK_B32_sdwa:
413 // SISrcMods uses the same bitmask for SEXT and NEG modifiers and
414 // hence the compiler can only support one type of modifier for
415 // each SDWA instruction. For V_CNDMASK_B32_sdwa, this is NEG
416 // since its operands get printed using
417 // AMDGPUInstPrinter::printOperandAndFPInputMods which produces
418 // the output intended for NEG if SEXT is set.
419 //
420 // The ISA does actually support both modifiers on most SDWA
421 // instructions.
422 //
423 // FIXME Accept SEXT here after fixing this issue.
424 if (Sext)
425 return false;
426 break;
427 }
428
429 // Find operand in instruction that matches source operand and replace it with
430 // target operand. Set corresponding src_sel
431 bool IsPreserveSrc = false;
432 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
433 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
434 MachineOperand *SrcMods =
435 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
436 assert(Src && (Src->isReg() || Src->isImm()));
437 if (!isSameReg(*Src, *getReplacedOperand())) {
438 // If this is not src0 then it could be src1
439 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
440 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
441 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
442
443 if (!Src ||
444 !isSameReg(*Src, *getReplacedOperand())) {
445 // It's possible this Src is a tied operand for
446 // UNUSED_PRESERVE, in which case we can either
447 // abandon the peephole attempt, or if legal we can
448 // copy the target operand into the tied slot
449 // if the preserve operation will effectively cause the same
450 // result by overwriting the rest of the dst.
451 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
452 MachineOperand *DstUnused =
453 TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
454
455 if (Dst &&
456 DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
457 // This will work if the tied src is accessing WORD_0, and the dst is
458 // writing WORD_1. Modifiers don't matter because all the bits that
459 // would be impacted are being overwritten by the dst.
460 // Any other case will not work.
461 SdwaSel DstSel = static_cast<SdwaSel>(
462 TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));
463 if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
464 getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
465 IsPreserveSrc = true;
466 auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
467 AMDGPU::OpName::vdst);
468 auto TiedIdx = MI.findTiedOperandIdx(DstIdx);
469 Src = &MI.getOperand(TiedIdx);
470 SrcSel = nullptr;
471 SrcMods = nullptr;
472 } else {
473 // Not legal to convert this src
474 return false;
475 }
476 }
477 }
478 assert(Src && Src->isReg());
479
480 if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
481 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
482 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
483 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
484 !isSameReg(*Src, *getReplacedOperand())) {
485 // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
486 // src2. This is not allowed.
487 return false;
488 }
489
490 assert(isSameReg(*Src, *getReplacedOperand()) &&
491 (IsPreserveSrc || (SrcSel && SrcMods)));
492 }
493 copyRegOperand(*Src, *getTargetOperand());
494 if (!IsPreserveSrc) {
495 SdwaSel ExistingSel = static_cast<SdwaSel>(SrcSel->getImm());
496 SrcSel->setImm(*combineSdwaSel(ExistingSel, getSrcSel()));
497 SrcMods->setImm(getSrcMods(TII, Src));
498 }
499 getTargetOperand()->setIsKill(false);
500 return true;
501}
502
503/// Verify that the SDWA selection operand \p SrcSelOpName of the SDWA
504/// instruction \p MI can be combined with the selection \p OpSel.
505static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII,
506 AMDGPU::OpName SrcSelOpName, SdwaSel OpSel) {
507 assert(TII->isSDWA(MI.getOpcode()));
508
509 const MachineOperand *SrcSelOp = TII->getNamedOperand(MI, SrcSelOpName);
510 SdwaSel SrcSel = static_cast<SdwaSel>(SrcSelOp->getImm());
511
512 return combineSdwaSel(SrcSel, OpSel).has_value();
513}
514
515/// Verify that \p Op is the same register as the operand of the SDWA
516/// instruction \p MI named by \p SrcOpName and that the SDWA
517/// selection \p SrcSelOpName can be combined with the \p OpSel.
518static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII,
519 AMDGPU::OpName SrcOpName,
520 AMDGPU::OpName SrcSelOpName, MachineOperand *Op,
521 SdwaSel OpSel) {
522 assert(TII->isSDWA(MI.getOpcode()));
523
524 const MachineOperand *Src = TII->getNamedOperand(MI, SrcOpName);
525 if (!Src || !isSameReg(*Src, *Op))
526 return true;
527
528 return canCombineOpSel(MI, TII, SrcSelOpName, OpSel);
529}
530
531bool SDWASrcOperand::canCombineSelections(const MachineInstr &MI,
532 const SIInstrInfo *TII) {
533 if (!TII->isSDWA(MI.getOpcode()))
534 return true;
535
536 using namespace AMDGPU;
537
538 return canCombineOpSel(MI, TII, OpName::src0, OpName::src0_sel,
539 getReplacedOperand(), getSrcSel()) &&
540 canCombineOpSel(MI, TII, OpName::src1, OpName::src1_sel,
541 getReplacedOperand(), getSrcSel());
542}
543
544MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
545 const GCNSubtarget &ST,
546 SDWAOperandsMap *PotentialMatches) {
547 // For SDWA dst operand potential instruction is one that defines register
548 // that this operand uses
549 MachineRegisterInfo *MRI = getMRI();
550 MachineInstr *ParentMI = getParentInst();
551
552 MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
553 if (!PotentialMO)
554 return nullptr;
555
556 // Check that ParentMI is the only instruction that uses replaced register
557 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
558 if (&UseInst != ParentMI)
559 return nullptr;
560 }
561
562 MachineInstr *Parent = PotentialMO->getParent();
563 return canCombineSelections(*Parent, TII) ? Parent : nullptr;
564}
565
566bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
567 // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
568
569 if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
570 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
571 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
572 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
573 getDstSel() != AMDGPU::SDWA::DWORD) {
574 // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
575 return false;
576 }
577
578 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
579 assert(Operand &&
580 Operand->isReg() &&
581 isSameReg(*Operand, *getReplacedOperand()));
582 copyRegOperand(*Operand, *getTargetOperand());
583 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
584 assert(DstSel);
585
586 SdwaSel ExistingSel = static_cast<SdwaSel>(DstSel->getImm());
587 DstSel->setImm(combineSdwaSel(ExistingSel, getDstSel()).value());
588
589 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
591 DstUnused->setImm(getDstUnused());
592
593 // Remove original instruction because it would conflict with our new
594 // instruction by register definition
595 getParentInst()->eraseFromParent();
596 return true;
597}
598
599bool SDWADstOperand::canCombineSelections(const MachineInstr &MI,
600 const SIInstrInfo *TII) {
601 if (!TII->isSDWA(MI.getOpcode()))
602 return true;
603
604 return canCombineOpSel(MI, TII, AMDGPU::OpName::dst_sel, getDstSel());
605}
606
607bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
608 const SIInstrInfo *TII) {
609 // MI should be moved right before v_or_b32.
610 // For this we should clear all kill flags on uses of MI src-operands or else
611 // we can encounter problem with use of killed operand.
612 for (MachineOperand &MO : MI.uses()) {
613 if (!MO.isReg())
614 continue;
615 getMRI()->clearKillFlags(MO.getReg());
616 }
617
618 // Move MI before v_or_b32
619 MI.getParent()->remove(&MI);
620 getParentInst()->getParent()->insert(getParentInst(), &MI);
621
622 // Add Implicit use of preserved register
623 MachineInstrBuilder MIB(*MI.getMF(), MI);
624 MIB.addReg(getPreservedOperand()->getReg(),
625 RegState::ImplicitKill,
626 getPreservedOperand()->getSubReg());
627
628 // Tie dst to implicit use
629 MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
630 MI.getNumOperands() - 1);
631
632 // Convert MI as any other SDWADstOperand and remove v_or_b32
633 return SDWADstOperand::convertToSDWA(MI, TII);
634}
635
636bool SDWADstPreserveOperand::canCombineSelections(const MachineInstr &MI,
637 const SIInstrInfo *TII) {
638 return SDWADstOperand::canCombineSelections(MI, TII);
639}
640
641std::optional<int64_t>
642SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
643 if (Op.isImm()) {
644 return Op.getImm();
645 }
646
647 // If this is not immediate then it can be copy of immediate value, e.g.:
648 // %1 = S_MOV_B32 255;
649 if (Op.isReg()) {
650 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
651 if (!isSameReg(Op, Def))
652 continue;
653
654 const MachineInstr *DefInst = Def.getParent();
655 if (!TII->isFoldableCopy(*DefInst))
656 return std::nullopt;
657
658 const MachineOperand &Copied = DefInst->getOperand(1);
659 if (!Copied.isImm())
660 return std::nullopt;
661
662 return Copied.getImm();
663 }
664 }
665
666 return std::nullopt;
667}
668
669std::unique_ptr<SDWAOperand>
670SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
671 unsigned Opcode = MI.getOpcode();
672 switch (Opcode) {
673 case AMDGPU::V_LSHRREV_B32_e32:
674 case AMDGPU::V_ASHRREV_I32_e32:
675 case AMDGPU::V_LSHLREV_B32_e32:
676 case AMDGPU::V_LSHRREV_B32_e64:
677 case AMDGPU::V_ASHRREV_I32_e64:
678 case AMDGPU::V_LSHLREV_B32_e64: {
679 // from: v_lshrrev_b32_e32 v1, 16/24, v0
680 // to SDWA src:v0 src_sel:WORD_1/BYTE_3
681
682 // from: v_ashrrev_i32_e32 v1, 16/24, v0
683 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
684
685 // from: v_lshlrev_b32_e32 v1, 16/24, v0
686 // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
687 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
688 auto Imm = foldToImm(*Src0);
689 if (!Imm)
690 break;
691
692 if (*Imm != 16 && *Imm != 24)
693 break;
694
695 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
696 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
697 if (!Src1->isReg() || Src1->getReg().isPhysical() ||
698 Dst->getReg().isPhysical())
699 break;
700
701 if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
702 Opcode == AMDGPU::V_LSHLREV_B32_e64) {
703 return std::make_unique<SDWADstOperand>(
704 Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
705 }
706 return std::make_unique<SDWASrcOperand>(
707 Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
708 Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
709 Opcode != AMDGPU::V_LSHRREV_B32_e64);
710 break;
711 }
712
713 case AMDGPU::V_LSHRREV_B16_e32:
714 case AMDGPU::V_LSHLREV_B16_e32:
715 case AMDGPU::V_LSHRREV_B16_e64:
716 case AMDGPU::V_LSHRREV_B16_opsel_e64:
717 case AMDGPU::V_LSHLREV_B16_opsel_e64:
718 case AMDGPU::V_LSHLREV_B16_e64: {
719 // V_ASHRREV_I16_e32 and V_ASHRREV_I16_e64 are
720 // not included here because they zero-fill the high 16-bits.
721
722 // from: v_lshrrev_b16_e32 v1, 8, v0
723 // to SDWA src:v0 src_sel:BYTE_1
724
725 // from: v_lshlrev_b16_e32 v1, 8, v0
726 // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
727 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
728 auto Imm = foldToImm(*Src0);
729 if (!Imm || *Imm != 8)
730 break;
731
732 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
733 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
734
735 if (!Src1->isReg() || Src1->getReg().isPhysical() ||
736 Dst->getReg().isPhysical())
737 break;
738
739 if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
740 Opcode == AMDGPU::V_LSHLREV_B16_opsel_e64 ||
741 Opcode == AMDGPU::V_LSHLREV_B16_e64)
742 return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
743 return std::make_unique<SDWASrcOperand>(Src1, Dst, BYTE_1, false, false,
744 false);
745 break;
746 }
747
748 case AMDGPU::V_BFE_I32_e64:
749 case AMDGPU::V_BFE_U32_e64: {
750 // e.g.:
751 // from: v_bfe_u32 v1, v0, 8, 8
752 // to SDWA src:v0 src_sel:BYTE_1
753
754 // offset | width | src_sel
755 // ------------------------
756 // 0 | 8 | BYTE_0
757 // 0 | 16 | WORD_0
758 // 0 | 32 | DWORD ?
759 // 8 | 8 | BYTE_1
760 // 16 | 8 | BYTE_2
761 // 16 | 16 | WORD_1
762 // 24 | 8 | BYTE_3
763
764 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
765 auto Offset = foldToImm(*Src1);
766 if (!Offset)
767 break;
768
769 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
770 auto Width = foldToImm(*Src2);
771 if (!Width)
772 break;
773
774 SdwaSel SrcSel = DWORD;
775
776 if (*Offset == 0 && *Width == 8)
777 SrcSel = BYTE_0;
778 else if (*Offset == 0 && *Width == 16)
779 SrcSel = WORD_0;
780 else if (*Offset == 0 && *Width == 32)
781 SrcSel = DWORD;
782 else if (*Offset == 8 && *Width == 8)
783 SrcSel = BYTE_1;
784 else if (*Offset == 16 && *Width == 8)
785 SrcSel = BYTE_2;
786 else if (*Offset == 16 && *Width == 16)
787 SrcSel = WORD_1;
788 else if (*Offset == 24 && *Width == 8)
789 SrcSel = BYTE_3;
790 else
791 break;
792
793 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
794 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
795
796 if (!Src0->isReg() || Src0->getReg().isPhysical() ||
797 Dst->getReg().isPhysical())
798 break;
799
800 return std::make_unique<SDWASrcOperand>(
801 Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
802 }
803
804 case AMDGPU::V_AND_B32_e32:
805 case AMDGPU::V_AND_B32_e64: {
806 // e.g.:
807 // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
808 // to SDWA src:v0 src_sel:WORD_0/BYTE_0
809
810 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
811 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
812 auto *ValSrc = Src1;
813 auto Imm = foldToImm(*Src0);
814
815 if (!Imm) {
816 Imm = foldToImm(*Src1);
817 ValSrc = Src0;
818 }
819
820 if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
821 break;
822
823 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
824
825 if (!ValSrc->isReg() || ValSrc->getReg().isPhysical() ||
826 Dst->getReg().isPhysical())
827 break;
828
829 return std::make_unique<SDWASrcOperand>(
830 ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
831 }
832
833 case AMDGPU::V_OR_B32_e32:
834 case AMDGPU::V_OR_B32_e64: {
835 // Patterns for dst_unused:UNUSED_PRESERVE.
836 // e.g., from:
837 // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD
838 // src1_sel:WORD_1 src2_sel:WORD1
839 // v_add_f16_e32 v3, v1, v2
840 // v_or_b32_e32 v4, v0, v3
841 // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3
842
843 // Check if one of operands of v_or_b32 is SDWA instruction
844 using CheckRetType =
845 std::optional<std::pair<MachineOperand *, MachineOperand *>>;
846 auto CheckOROperandsForSDWA =
847 [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
848 if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
849 return CheckRetType(std::nullopt);
850
851 MachineOperand *Op1Def = findSingleRegDef(Op1, MRI);
852 if (!Op1Def)
853 return CheckRetType(std::nullopt);
854
855 MachineInstr *Op1Inst = Op1Def->getParent();
856 if (!TII->isSDWA(*Op1Inst))
857 return CheckRetType(std::nullopt);
858
859 MachineOperand *Op2Def = findSingleRegDef(Op2, MRI);
860 if (!Op2Def)
861 return CheckRetType(std::nullopt);
862
863 return CheckRetType(std::pair(Op1Def, Op2Def));
864 };
865
866 MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
867 MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
868 assert(OrSDWA && OrOther);
869 auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
870 if (!Res) {
871 OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
872 OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
873 assert(OrSDWA && OrOther);
874 Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
875 if (!Res)
876 break;
877 }
878
879 MachineOperand *OrSDWADef = Res->first;
880 MachineOperand *OrOtherDef = Res->second;
881 assert(OrSDWADef && OrOtherDef);
882
883 MachineInstr *SDWAInst = OrSDWADef->getParent();
884 MachineInstr *OtherInst = OrOtherDef->getParent();
885
886 // Check that OtherInstr is actually bitwise compatible with SDWAInst = their
887 // destination patterns don't overlap. Compatible instruction can be either
888 // regular instruction with compatible bitness or SDWA instruction with
889 // correct dst_sel
890 // SDWAInst | OtherInst bitness / OtherInst dst_sel
891 // -----------------------------------------------------
892 // DWORD | no / no
893 // WORD_0 | no / BYTE_2/3, WORD_1
894 // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0
895 // BYTE_0 | no / BYTE_1/2/3, WORD_1
896 // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1
897 // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0
898 // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0
899 // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK
900 // but v_add_f32 is not.
901
902 // TODO: add support for non-SDWA instructions as OtherInst.
903 // For now this only works with SDWA instructions. For regular instructions
904 // there is no way to determine if the instruction writes only 8/16/24-bit
905 // out of full register size and all registers are at min 32-bit wide.
906 if (!TII->isSDWA(*OtherInst))
907 break;
908
909 SdwaSel DstSel = static_cast<SdwaSel>(
910 TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));
911 SdwaSel OtherDstSel = static_cast<SdwaSel>(
912 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
913
914 bool DstSelAgree = false;
915 switch (DstSel) {
916 case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
917 (OtherDstSel == BYTE_3) ||
918 (OtherDstSel == WORD_1));
919 break;
920 case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
921 (OtherDstSel == BYTE_1) ||
922 (OtherDstSel == WORD_0));
923 break;
924 case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
925 (OtherDstSel == BYTE_2) ||
926 (OtherDstSel == BYTE_3) ||
927 (OtherDstSel == WORD_1));
928 break;
929 case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
930 (OtherDstSel == BYTE_2) ||
931 (OtherDstSel == BYTE_3) ||
932 (OtherDstSel == WORD_1));
933 break;
934 case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
935 (OtherDstSel == BYTE_1) ||
936 (OtherDstSel == BYTE_3) ||
937 (OtherDstSel == WORD_0));
938 break;
939 case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
940 (OtherDstSel == BYTE_1) ||
941 (OtherDstSel == BYTE_2) ||
942 (OtherDstSel == WORD_0));
943 break;
944 default: DstSelAgree = false;
945 }
946
947 if (!DstSelAgree)
948 break;
949
950 // Also OtherInst dst_unused should be UNUSED_PAD
951 DstUnused OtherDstUnused = static_cast<DstUnused>(
952 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
953 if (OtherDstUnused != DstUnused::UNUSED_PAD)
954 break;
955
956 // Create DstPreserveOperand
957 MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
958 assert(OrDst && OrDst->isReg());
959
960 return std::make_unique<SDWADstPreserveOperand>(
961 OrDst, OrSDWADef, OrOtherDef, DstSel);
962
963 }
964 }
965
966 return std::unique_ptr<SDWAOperand>(nullptr);
967}
968
969#if !defined(NDEBUG)
970static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
971 Operand.print(OS);
972 return OS;
973}
974#endif
975
976void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
977 for (MachineInstr &MI : MBB) {
978 if (auto Operand = matchSDWAOperand(MI)) {
979 LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
980 SDWAOperands[&MI] = std::move(Operand);
981 ++NumSDWAPatternsFound;
982 }
983 }
984}
985
986// Convert the V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows
987// isConvertibleToSDWA to perform its transformation on V_ADD_CO_U32_e32 into
988// V_ADD_CO_U32_sdwa.
989//
990// We are transforming from a VOP3 into a VOP2 form of the instruction.
991// %19:vgpr_32 = V_AND_B32_e32 255,
992// killed %16:vgpr_32, implicit $exec
993// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
994// %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
995// %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
996// %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
997//
998// becomes
999// %47:vgpr_32 = V_ADD_CO_U32_sdwa
1000// 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
1001// implicit-def $vcc, implicit $exec
1002// %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
1003// %26.sub1:vreg_64, %54:vgpr_32, killed $vcc, implicit $exec
1004void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
1005 const GCNSubtarget &ST) const {
1006 int Opc = MI.getOpcode();
1007 assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
1008 "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
1009
1010 // Can the candidate MI be shrunk?
1011 if (!TII->canShrink(MI, *MRI))
1012 return;
1014 // Find the related ADD instruction.
1015 const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
1016 if (!Sdst)
1017 return;
1018 MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
1019 if (!NextOp)
1020 return;
1021 MachineInstr &MISucc = *NextOp->getParent();
1022
1023 // Make sure the carry in/out are subsequently unused.
1024 MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
1025 if (!CarryIn)
1026 return;
1027 MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
1028 if (!CarryOut)
1029 return;
1030 if (!MRI->hasOneNonDBGUse(CarryIn->getReg()) ||
1031 !MRI->use_nodbg_empty(CarryOut->getReg()))
1032 return;
1033 // Make sure VCC or its subregs are dead before MI.
1034 MachineBasicBlock &MBB = *MI.getParent();
1036 MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
1037 if (Liveness != MachineBasicBlock::LQR_Dead)
1038 return;
1039 // Check if VCC is referenced in range of (MI,MISucc].
1040 for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
1041 I != E; ++I) {
1042 if (I->modifiesRegister(AMDGPU::VCC, TRI))
1043 return;
1044 }
1045
1046 // Replace MI with V_{SUB|ADD}_I32_e32
1047 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
1048 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
1049 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
1050 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
1051 .setMIFlags(MI.getFlags());
1052
1053 MI.eraseFromParent();
1054
1055 // Since the carry output of MI is now VCC, update its use in MISucc.
1056
1057 MISucc.substituteRegister(CarryIn->getReg(), TRI->getVCC(), 0, *TRI);
1058}
1059
1060/// Try to convert an \p MI in VOP3 which takes an src2 carry-in
1061/// operand into the corresponding VOP2 form which expects the
1062/// argument in VCC. To this end, add an copy from the carry-in to
1063/// VCC. The conversion will only be applied if \p MI can be shrunk
1064/// to VOP2 and if VCC can be proven to be dead before \p MI.
1065void SIPeepholeSDWA::convertVcndmaskToVOP2(MachineInstr &MI,
1066 const GCNSubtarget &ST) const {
1067 assert(MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64);
1068
1069 LLVM_DEBUG(dbgs() << "Attempting VOP2 conversion: " << MI);
1070 if (!TII->canShrink(MI, *MRI)) {
1071 LLVM_DEBUG(dbgs() << "Cannot shrink instruction\n");
1072 return;
1073 }
1074
1075 const MachineOperand &CarryIn =
1076 *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1077 Register CarryReg = CarryIn.getReg();
1078 MachineInstr *CarryDef = MRI->getVRegDef(CarryReg);
1079 if (!CarryDef) {
1080 LLVM_DEBUG(dbgs() << "Missing carry-in operand definition\n");
1081 return;
1082 }
1083
1084 // Make sure VCC or its subregs are dead before MI.
1085 MCRegister Vcc = TRI->getVCC();
1086 MachineBasicBlock &MBB = *MI.getParent();
1089 if (Liveness != MachineBasicBlock::LQR_Dead) {
1090 LLVM_DEBUG(dbgs() << "VCC not known to be dead before instruction\n");
1091 return;
1092 }
1093
1094 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), Vcc).add(CarryIn);
1095
1096 auto Converted = BuildMI(MBB, MI, MI.getDebugLoc(),
1097 TII->get(AMDGPU::getVOPe32(MI.getOpcode())))
1098 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
1099 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
1100 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
1101 .setMIFlags(MI.getFlags());
1102 TII->fixImplicitOperands(*Converted);
1103 LLVM_DEBUG(dbgs() << "Converted to VOP2: " << *Converted);
1104 (void)Converted;
1105 MI.eraseFromParent();
1106}
1107
1108namespace {
1109bool isConvertibleToSDWA(MachineInstr &MI,
1110 const GCNSubtarget &ST,
1111 const SIInstrInfo* TII) {
1112 // Check if this is already an SDWA instruction
1113 unsigned Opc = MI.getOpcode();
1114 if (TII->isSDWA(Opc))
1115 return true;
1116
1117 // Can only be handled after ealier conversion to
1118 // AMDGPU::V_CNDMASK_B32_e32 which is not always possible.
1119 if (Opc == AMDGPU::V_CNDMASK_B32_e64)
1120 return false;
1121
1122 // Check if this instruction has opcode that supports SDWA
1123 if (AMDGPU::getSDWAOp(Opc) == -1)
1125
1126 if (AMDGPU::getSDWAOp(Opc) == -1)
1127 return false;
1128
1129 if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1130 return false;
1131
1132 if (TII->isVOPC(Opc)) {
1133 if (!ST.hasSDWASdst()) {
1134 const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
1135 if (SDst && (SDst->getReg() != AMDGPU::VCC &&
1136 SDst->getReg() != AMDGPU::VCC_LO))
1137 return false;
1138 }
1139
1140 if (!ST.hasSDWAOutModsVOPC() &&
1141 (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
1142 TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
1143 return false;
1144
1145 } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
1146 !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
1147 return false;
1148 }
1149
1150 if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
1151 Opc == AMDGPU::V_FMAC_F32_e32 ||
1152 Opc == AMDGPU::V_MAC_F16_e32 ||
1153 Opc == AMDGPU::V_MAC_F32_e32))
1154 return false;
1155
1156 // Check if target supports this SDWA opcode
1157 if (TII->pseudoToMCOpcode(Opc) == -1)
1158 return false;
1159
1160 if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
1161 if (!Src0->isReg() && !Src0->isImm())
1162 return false;
1163 }
1164
1165 if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
1166 if (!Src1->isReg() && !Src1->isImm())
1167 return false;
1168 }
1169
1170 return true;
1171}
1172} // namespace
1173
1174MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
1175 unsigned Opcode = MI.getOpcode();
1176 assert(!TII->isSDWA(Opcode));
1177
1178 int SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
1179 if (SDWAOpcode == -1)
1180 SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
1181 assert(SDWAOpcode != -1);
1182
1183 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
1184
1185 // Create SDWA version of instruction MI and initialize its operands
1186 MachineInstrBuilder SDWAInst =
1187 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
1188 .setMIFlags(MI.getFlags());
1189
1190 // Copy dst, if it is present in original then should also be present in SDWA
1191 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
1192 if (Dst) {
1193 assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::vdst));
1194 SDWAInst.add(*Dst);
1195 } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
1196 assert(Dst && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst));
1197 SDWAInst.add(*Dst);
1198 } else {
1199 assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst));
1200 SDWAInst.addReg(TRI->getVCC(), RegState::Define);
1201 }
1202
1203 // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
1204 // src0_modifiers (except for v_nop_sdwa, but it can't get here)
1205 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1206 assert(Src0 && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0) &&
1207 AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_modifiers));
1208 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
1209 SDWAInst.addImm(Mod->getImm());
1210 else
1211 SDWAInst.addImm(0);
1212 SDWAInst.add(*Src0);
1213
1214 // Copy src1 if present, initialize src1_modifiers.
1215 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1216 if (Src1) {
1217 assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1) &&
1218 AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_modifiers));
1219 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
1220 SDWAInst.addImm(Mod->getImm());
1221 else
1222 SDWAInst.addImm(0);
1223 SDWAInst.add(*Src1);
1224 }
1225
1226 if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
1227 SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
1228 SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1229 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1230 // v_mac_f16/32 has additional src2 operand tied to vdst
1231 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1232 assert(Src2);
1233 SDWAInst.add(*Src2);
1234 }
1235
1236 // Copy clamp if present, initialize otherwise
1237 assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::clamp));
1238 MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
1239 if (Clamp) {
1240 SDWAInst.add(*Clamp);
1241 } else {
1242 SDWAInst.addImm(0);
1243 }
1244
1245 // Copy omod if present, initialize otherwise if needed
1246 if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::omod)) {
1247 MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
1248 if (OMod) {
1249 SDWAInst.add(*OMod);
1250 } else {
1251 SDWAInst.addImm(0);
1252 }
1253 }
1254
1255 // Initialize SDWA specific operands
1256 if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_sel))
1257 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1258
1259 if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_unused))
1260 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
1261
1262 assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_sel));
1263 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1264
1265 if (Src1) {
1266 assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_sel));
1267 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1268 }
1269
1270 // Check for a preserved register that needs to be copied.
1271 MachineInstr *Ret = SDWAInst.getInstr();
1272 TII->fixImplicitOperands(*Ret);
1273 return Ret;
1274}
1275
1276bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
1277 const SDWAOperandsVector &SDWAOperands) {
1278 LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);
1279
1280 MachineInstr *SDWAInst;
1281 if (TII->isSDWA(MI.getOpcode())) {
1282 // Clone the instruction to allow revoking changes
1283 // made to MI during the processing of the operands
1284 // if the conversion fails.
1285 SDWAInst = MI.getMF()->CloneMachineInstr(&MI);
1286 MI.getParent()->insert(MI.getIterator(), SDWAInst);
1287 } else {
1288 SDWAInst = createSDWAVersion(MI);
1289 }
1290
1291 // Apply all sdwa operand patterns.
1292 bool Converted = false;
1293 for (auto &Operand : SDWAOperands) {
1294 LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
1295 // There should be no intersection between SDWA operands and potential MIs
1296 // e.g.:
1297 // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
1298 // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
1299 // v_add_u32 v3, v4, v2
1300 //
1301 // In that example it is possible that we would fold 2nd instruction into
1302 // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that
1303 // was already destroyed). So if SDWAOperand is also a potential MI then do
1304 // not apply it.
1305 if (PotentialMatches.count(Operand->getParentInst()) == 0)
1306 Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1307 }
1308
1309 if (!Converted) {
1310 SDWAInst->eraseFromParent();
1311 return false;
1312 }
1313
1314 ConvertedInstructions.push_back(SDWAInst);
1315 for (MachineOperand &MO : SDWAInst->uses()) {
1316 if (!MO.isReg())
1317 continue;
1318
1319 MRI->clearKillFlags(MO.getReg());
1320 }
1321 LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
1322 ++NumSDWAInstructionsPeepholed;
1323
1324 MI.eraseFromParent();
1325 return true;
1326}
1327
1328// If an instruction was converted to SDWA it should not have immediates or SGPR
1329// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
1330void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
1331 const GCNSubtarget &ST) const {
1332 const MCInstrDesc &Desc = TII->get(MI.getOpcode());
1333 unsigned ConstantBusCount = 0;
1334 for (MachineOperand &Op : MI.explicit_uses()) {
1335 if (Op.isReg()) {
1336 if (TRI->isVGPR(*MRI, Op.getReg()))
1337 continue;
1338
1339 if (ST.hasSDWAScalar() && ConstantBusCount == 0) {
1340 ++ConstantBusCount;
1341 continue;
1342 }
1343 } else if (!Op.isImm())
1344 continue;
1345
1346 unsigned I = Op.getOperandNo();
1347 const TargetRegisterClass *OpRC = TII->getRegClass(Desc, I);
1348 if (!OpRC || !TRI->isVSSuperClass(OpRC))
1349 continue;
1350
1351 Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1352 auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
1353 TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1354 if (Op.isImm())
1355 Copy.addImm(Op.getImm());
1356 else if (Op.isReg())
1357 Copy.addReg(Op.getReg(), getKillRegState(Op.isKill()), Op.getSubReg());
1358 Op.ChangeToRegister(VGPR, false);
1359 }
1360}
1361
1362bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {
1363 if (skipFunction(MF.getFunction()))
1364 return false;
1365
1366 return SIPeepholeSDWA().run(MF);
1367}
1368
1369bool SIPeepholeSDWA::run(MachineFunction &MF) {
1370 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1371
1372 if (!ST.hasSDWA())
1373 return false;
1374
1375 MRI = &MF.getRegInfo();
1376 TRI = ST.getRegisterInfo();
1377 TII = ST.getInstrInfo();
1378
1379 // Find all SDWA operands in MF.
1380 bool Ret = false;
1381 for (MachineBasicBlock &MBB : MF) {
1382 bool Changed = false;
1383 do {
1384 // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
1385 // Look for a possible ADD or SUB that resulted from a previously lowered
1386 // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
1387 // lowers the pair of instructions into e32 form.
1388 matchSDWAOperands(MBB);
1389 for (const auto &OperandPair : SDWAOperands) {
1390 const auto &Operand = OperandPair.second;
1391 MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST);
1392 if (!PotentialMI)
1393 continue;
1394
1395 switch (PotentialMI->getOpcode()) {
1396 case AMDGPU::V_ADD_CO_U32_e64:
1397 case AMDGPU::V_SUB_CO_U32_e64:
1398 pseudoOpConvertToVOP2(*PotentialMI, ST);
1399 break;
1400 case AMDGPU::V_CNDMASK_B32_e64:
1401 convertVcndmaskToVOP2(*PotentialMI, ST);
1402 break;
1403 };
1404 }
1405 SDWAOperands.clear();
1406
1407 // Generate potential match list.
1408 matchSDWAOperands(MBB);
1409
1410 for (const auto &OperandPair : SDWAOperands) {
1411 const auto &Operand = OperandPair.second;
1412 MachineInstr *PotentialMI =
1413 Operand->potentialToConvert(TII, ST, &PotentialMatches);
1414
1415 if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST, TII))
1416 PotentialMatches[PotentialMI].push_back(Operand.get());
1417 }
1418
1419 for (auto &PotentialPair : PotentialMatches) {
1420 MachineInstr &PotentialMI = *PotentialPair.first;
1421 convertToSDWA(PotentialMI, PotentialPair.second);
1422 }
1423
1424 PotentialMatches.clear();
1425 SDWAOperands.clear();
1426
1427 Changed = !ConvertedInstructions.empty();
1428
1429 if (Changed)
1430 Ret = true;
1431 while (!ConvertedInstructions.empty())
1432 legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
1433 } while (Changed);
1434 }
1435
1436 return Ret;
1437}
1438
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
static std::optional< SdwaSel > combineSdwaSel(SdwaSel Sel, SdwaSel OperandSel)
Combine an SDWA instruction's existing SDWA selection Sel with the SDWA selection OperandSel of its o...
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII, AMDGPU::OpName SrcSelOpName, SdwaSel OpSel)
Verify that the SDWA selection operand SrcSelOpName of the SDWA instruction MI can be combined with t...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasOptNone() const
Do not optimize this function (-O0).
Definition Function.h:708
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mop_range uses()
Returns all operands which may be register uses.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI MachineOperand * getOneNonDBGUse(Register RegNo) const
If the register has a single non-Debug use, returns it; otherwise returns nullptr.
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
iterator_range< def_iterator > def_operands(Register Reg) const
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
LLVM_READONLY int32_t getSDWAOp(uint32_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Op::Description Desc
FunctionPass * createSIPeepholeSDWALegacyPass()
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
char & SIPeepholeSDWALegacyID