LLVM  16.0.0git
SIPeepholeSDWA.cpp
Go to the documentation of this file.
1 //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass tries to apply several peephole SDWA patterns.
10 ///
11 /// E.g. original:
12 /// V_LSHRREV_B32_e32 %0, 16, %1
13 /// V_ADD_CO_U32_e32 %2, %0, %3
14 /// V_LSHLREV_B32_e32 %4, 16, %2
15 ///
16 /// Replace:
17 /// V_ADD_CO_U32_sdwa %4, %1, %3
18 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #include "AMDGPU.h"
23 #include "GCNSubtarget.h"
25 #include "llvm/ADT/MapVector.h"
26 #include "llvm/ADT/Statistic.h"
28 #include <optional>
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "si-peephole-sdwa"
33 
34 STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
35 STATISTIC(NumSDWAInstructionsPeepholed,
36  "Number of instruction converted to SDWA.");
37 
38 namespace {
39 
40 class SDWAOperand;
41 class SDWADstOperand;
42 
43 class SIPeepholeSDWA : public MachineFunctionPass {
44 public:
45  using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
46 
47 private:
49  const SIRegisterInfo *TRI;
50  const SIInstrInfo *TII;
51 
54  SmallVector<MachineInstr *, 8> ConvertedInstructions;
55 
56  Optional<int64_t> foldToImm(const MachineOperand &Op) const;
57 
58 public:
59  static char ID;
60 
61  SIPeepholeSDWA() : MachineFunctionPass(ID) {
63  }
64 
65  bool runOnMachineFunction(MachineFunction &MF) override;
66  void matchSDWAOperands(MachineBasicBlock &MBB);
67  std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
68  bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
69  void pseudoOpConvertToVOP2(MachineInstr &MI,
70  const GCNSubtarget &ST) const;
71  bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
72  void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
73 
74  StringRef getPassName() const override { return "SI Peephole SDWA"; }
75 
76  void getAnalysisUsage(AnalysisUsage &AU) const override {
77  AU.setPreservesCFG();
79  }
80 };
81 
82 class SDWAOperand {
83 private:
84  MachineOperand *Target; // Operand that would be used in converted instruction
85  MachineOperand *Replaced; // Operand that would be replace by Target
86 
87 public:
88  SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
89  : Target(TargetOp), Replaced(ReplacedOp) {
90  assert(Target->isReg());
91  assert(Replaced->isReg());
92  }
93 
94  virtual ~SDWAOperand() = default;
95 
96  virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
97  virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
98 
99  MachineOperand *getTargetOperand() const { return Target; }
100  MachineOperand *getReplacedOperand() const { return Replaced; }
101  MachineInstr *getParentInst() const { return Target->getParent(); }
102 
103  MachineRegisterInfo *getMRI() const {
104  return &getParentInst()->getParent()->getParent()->getRegInfo();
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  virtual void print(raw_ostream& OS) const = 0;
109  void dump() const { print(dbgs()); }
110 #endif
111 };
112 
113 using namespace AMDGPU::SDWA;
114 
115 class SDWASrcOperand : public SDWAOperand {
116 private:
117  SdwaSel SrcSel;
118  bool Abs;
119  bool Neg;
120  bool Sext;
121 
122 public:
123  SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
124  SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
125  bool Sext_ = false)
126  : SDWAOperand(TargetOp, ReplacedOp),
127  SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
128 
129  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
130  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
131 
132  SdwaSel getSrcSel() const { return SrcSel; }
133  bool getAbs() const { return Abs; }
134  bool getNeg() const { return Neg; }
135  bool getSext() const { return Sext; }
136 
137  uint64_t getSrcMods(const SIInstrInfo *TII,
138  const MachineOperand *SrcOp) const;
139 
140 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
141  void print(raw_ostream& OS) const override;
142 #endif
143 };
144 
145 class SDWADstOperand : public SDWAOperand {
146 private:
147  SdwaSel DstSel;
148  DstUnused DstUn;
149 
150 public:
151 
152  SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
153  SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
154  : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
155 
156  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
157  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
158 
159  SdwaSel getDstSel() const { return DstSel; }
160  DstUnused getDstUnused() const { return DstUn; }
161 
162 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
163  void print(raw_ostream& OS) const override;
164 #endif
165 };
166 
167 class SDWADstPreserveOperand : public SDWADstOperand {
168 private:
169  MachineOperand *Preserve;
170 
171 public:
172  SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
173  MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
174  : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
175  Preserve(PreserveOp) {}
176 
177  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
178 
179  MachineOperand *getPreservedOperand() const { return Preserve; }
180 
181 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
182  void print(raw_ostream& OS) const override;
183 #endif
184 };
185 
186 } // end anonymous namespace
187 
188 INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
189 
190 char SIPeepholeSDWA::ID = 0;
191 
192 char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
193 
195  return new SIPeepholeSDWA();
196 }
197 
198 
199 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
201  switch(Sel) {
202  case BYTE_0: OS << "BYTE_0"; break;
203  case BYTE_1: OS << "BYTE_1"; break;
204  case BYTE_2: OS << "BYTE_2"; break;
205  case BYTE_3: OS << "BYTE_3"; break;
206  case WORD_0: OS << "WORD_0"; break;
207  case WORD_1: OS << "WORD_1"; break;
208  case DWORD: OS << "DWORD"; break;
209  }
210  return OS;
211 }
212 
213 static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
214  switch(Un) {
215  case UNUSED_PAD: OS << "UNUSED_PAD"; break;
216  case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
217  case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
218  }
219  return OS;
220 }
221 
223 void SDWASrcOperand::print(raw_ostream& OS) const {
224  OS << "SDWA src: " << *getTargetOperand()
225  << " src_sel:" << getSrcSel()
226  << " abs:" << getAbs() << " neg:" << getNeg()
227  << " sext:" << getSext() << '\n';
228 }
229 
231 void SDWADstOperand::print(raw_ostream& OS) const {
232  OS << "SDWA dst: " << *getTargetOperand()
233  << " dst_sel:" << getDstSel()
234  << " dst_unused:" << getDstUnused() << '\n';
235 }
236 
239  OS << "SDWA preserve dst: " << *getTargetOperand()
240  << " dst_sel:" << getDstSel()
241  << " preserve:" << *getPreservedOperand() << '\n';
242 }
243 
244 #endif
245 
247  assert(To.isReg() && From.isReg());
248  To.setReg(From.getReg());
249  To.setSubReg(From.getSubReg());
250  To.setIsUndef(From.isUndef());
251  if (To.isUse()) {
252  To.setIsKill(From.isKill());
253  } else {
254  To.setIsDead(From.isDead());
255  }
256 }
257 
258 static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
259  return LHS.isReg() &&
260  RHS.isReg() &&
261  LHS.getReg() == RHS.getReg() &&
262  LHS.getSubReg() == RHS.getSubReg();
263 }
264 
266  const MachineRegisterInfo *MRI) {
267  if (!Reg->isReg() || !Reg->isDef())
268  return nullptr;
269 
270  MachineOperand *ResMO = nullptr;
271  for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) {
272  // If there exist use of subreg of Reg then return nullptr
273  if (!isSameReg(UseMO, *Reg))
274  return nullptr;
275 
276  // Check that there is only one instruction that uses Reg
277  if (!ResMO) {
278  ResMO = &UseMO;
279  } else if (ResMO->getParent() != UseMO.getParent()) {
280  return nullptr;
281  }
282  }
283 
284  return ResMO;
285 }
286 
288  const MachineRegisterInfo *MRI) {
289  if (!Reg->isReg())
290  return nullptr;
291 
292  MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg());
293  if (!DefInstr)
294  return nullptr;
295 
296  for (auto &DefMO : DefInstr->defs()) {
297  if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
298  return &DefMO;
299  }
300 
301  // Ignore implicit defs.
302  return nullptr;
303 }
304 
305 uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
306  const MachineOperand *SrcOp) const {
307  uint64_t Mods = 0;
308  const auto *MI = SrcOp->getParent();
309  if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
310  if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
311  Mods = Mod->getImm();
312  }
313  } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
314  if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
315  Mods = Mod->getImm();
316  }
317  }
318  if (Abs || Neg) {
319  assert(!Sext &&
320  "Float and integer src modifiers can't be set simultaneously");
321  Mods |= Abs ? SISrcMods::ABS : 0u;
322  Mods ^= Neg ? SISrcMods::NEG : 0u;
323  } else if (Sext) {
324  Mods |= SISrcMods::SEXT;
325  }
326 
327  return Mods;
328 }
329 
330 MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
331  // For SDWA src operand potential instruction is one that use register
332  // defined by parent instruction
333  MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
334  if (!PotentialMO)
335  return nullptr;
336 
337  return PotentialMO->getParent();
338 }
339 
340 bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
341  // Find operand in instruction that matches source operand and replace it with
342  // target operand. Set corresponding src_sel
343  bool IsPreserveSrc = false;
344  MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
345  MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
346  MachineOperand *SrcMods =
347  TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
348  assert(Src && (Src->isReg() || Src->isImm()));
349  if (!isSameReg(*Src, *getReplacedOperand())) {
350  // If this is not src0 then it could be src1
351  Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
352  SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
353  SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
354 
355  if (!Src ||
356  !isSameReg(*Src, *getReplacedOperand())) {
357  // It's possible this Src is a tied operand for
358  // UNUSED_PRESERVE, in which case we can either
359  // abandon the peephole attempt, or if legal we can
360  // copy the target operand into the tied slot
361  // if the preserve operation will effectively cause the same
362  // result by overwriting the rest of the dst.
363  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
365  TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
366 
367  if (Dst &&
369  // This will work if the tied src is accessing WORD_0, and the dst is
370  // writing WORD_1. Modifiers don't matter because all the bits that
371  // would be impacted are being overwritten by the dst.
372  // Any other case will not work.
373  SdwaSel DstSel = static_cast<SdwaSel>(
374  TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));
375  if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
376  getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
377  IsPreserveSrc = true;
378  auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
379  AMDGPU::OpName::vdst);
380  auto TiedIdx = MI.findTiedOperandIdx(DstIdx);
381  Src = &MI.getOperand(TiedIdx);
382  SrcSel = nullptr;
383  SrcMods = nullptr;
384  } else {
385  // Not legal to convert this src
386  return false;
387  }
388  }
389  }
390  assert(Src && Src->isReg());
391 
392  if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
393  MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
394  MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
395  MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
396  !isSameReg(*Src, *getReplacedOperand())) {
397  // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
398  // src2. This is not allowed.
399  return false;
400  }
401 
402  assert(isSameReg(*Src, *getReplacedOperand()) &&
403  (IsPreserveSrc || (SrcSel && SrcMods)));
404  }
405  copyRegOperand(*Src, *getTargetOperand());
406  if (!IsPreserveSrc) {
407  SrcSel->setImm(getSrcSel());
408  SrcMods->setImm(getSrcMods(TII, Src));
409  }
410  getTargetOperand()->setIsKill(false);
411  return true;
412 }
413 
414 MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
415  // For SDWA dst operand potential instruction is one that defines register
416  // that this operand uses
417  MachineRegisterInfo *MRI = getMRI();
418  MachineInstr *ParentMI = getParentInst();
419 
420  MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
421  if (!PotentialMO)
422  return nullptr;
423 
424  // Check that ParentMI is the only instruction that uses replaced register
425  for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
426  if (&UseInst != ParentMI)
427  return nullptr;
428  }
429 
430  return PotentialMO->getParent();
431 }
432 
433 bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
434  // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
435 
436  if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
437  MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
438  MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
439  MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
440  getDstSel() != AMDGPU::SDWA::DWORD) {
441  // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
442  return false;
443  }
444 
445  MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
446  assert(Operand &&
447  Operand->isReg() &&
448  isSameReg(*Operand, *getReplacedOperand()));
449  copyRegOperand(*Operand, *getTargetOperand());
450  MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
451  assert(DstSel);
452  DstSel->setImm(getDstSel());
453  MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
454  assert(DstUnused);
455  DstUnused->setImm(getDstUnused());
456 
457  // Remove original instruction because it would conflict with our new
458  // instruction by register definition
459  getParentInst()->eraseFromParent();
460  return true;
461 }
462 
463 bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
464  const SIInstrInfo *TII) {
465  // MI should be moved right before v_or_b32.
466  // For this we should clear all kill flags on uses of MI src-operands or else
467  // we can encounter problem with use of killed operand.
468  for (MachineOperand &MO : MI.uses()) {
469  if (!MO.isReg())
470  continue;
471  getMRI()->clearKillFlags(MO.getReg());
472  }
473 
474  // Move MI before v_or_b32
475  auto MBB = MI.getParent();
476  MBB->remove(&MI);
477  MBB->insert(getParentInst(), &MI);
478 
479  // Add Implicit use of preserved register
481  MIB.addReg(getPreservedOperand()->getReg(),
483  getPreservedOperand()->getSubReg());
484 
485  // Tie dst to implicit use
486  MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
487  MI.getNumOperands() - 1);
488 
489  // Convert MI as any other SDWADstOperand and remove v_or_b32
490  return SDWADstOperand::convertToSDWA(MI, TII);
491 }
492 
493 Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
494  if (Op.isImm()) {
495  return Op.getImm();
496  }
497 
498  // If this is not immediate then it can be copy of immediate value, e.g.:
499  // %1 = S_MOV_B32 255;
500  if (Op.isReg()) {
501  for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
502  if (!isSameReg(Op, Def))
503  continue;
504 
505  const MachineInstr *DefInst = Def.getParent();
506  if (!TII->isFoldableCopy(*DefInst))
507  return None;
508 
509  const MachineOperand &Copied = DefInst->getOperand(1);
510  if (!Copied.isImm())
511  return None;
512 
513  return Copied.getImm();
514  }
515  }
516 
517  return None;
518 }
519 
520 std::unique_ptr<SDWAOperand>
521 SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
522  unsigned Opcode = MI.getOpcode();
523  switch (Opcode) {
524  case AMDGPU::V_LSHRREV_B32_e32:
525  case AMDGPU::V_ASHRREV_I32_e32:
526  case AMDGPU::V_LSHLREV_B32_e32:
527  case AMDGPU::V_LSHRREV_B32_e64:
528  case AMDGPU::V_ASHRREV_I32_e64:
529  case AMDGPU::V_LSHLREV_B32_e64: {
530  // from: v_lshrrev_b32_e32 v1, 16/24, v0
531  // to SDWA src:v0 src_sel:WORD_1/BYTE_3
532 
533  // from: v_ashrrev_i32_e32 v1, 16/24, v0
534  // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
535 
536  // from: v_lshlrev_b32_e32 v1, 16/24, v0
537  // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
538  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
539  auto Imm = foldToImm(*Src0);
540  if (!Imm)
541  break;
542 
543  if (*Imm != 16 && *Imm != 24)
544  break;
545 
546  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
547  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
548  if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
549  break;
550 
551  if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
552  Opcode == AMDGPU::V_LSHLREV_B32_e64) {
553  return std::make_unique<SDWADstOperand>(
554  Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
555  } else {
556  return std::make_unique<SDWASrcOperand>(
557  Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
558  Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
559  Opcode != AMDGPU::V_LSHRREV_B32_e64);
560  }
561  break;
562  }
563 
564  case AMDGPU::V_LSHRREV_B16_e32:
565  case AMDGPU::V_ASHRREV_I16_e32:
566  case AMDGPU::V_LSHLREV_B16_e32:
567  case AMDGPU::V_LSHRREV_B16_e64:
568  case AMDGPU::V_ASHRREV_I16_e64:
569  case AMDGPU::V_LSHLREV_B16_e64: {
570  // from: v_lshrrev_b16_e32 v1, 8, v0
571  // to SDWA src:v0 src_sel:BYTE_1
572 
573  // from: v_ashrrev_i16_e32 v1, 8, v0
574  // to SDWA src:v0 src_sel:BYTE_1 sext:1
575 
576  // from: v_lshlrev_b16_e32 v1, 8, v0
577  // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
578  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
579  auto Imm = foldToImm(*Src0);
580  if (!Imm || *Imm != 8)
581  break;
582 
583  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
584  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
585 
586  if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
587  break;
588 
589  if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
590  Opcode == AMDGPU::V_LSHLREV_B16_e64) {
591  return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
592  } else {
593  return std::make_unique<SDWASrcOperand>(
594  Src1, Dst, BYTE_1, false, false,
595  Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
596  Opcode != AMDGPU::V_LSHRREV_B16_e64);
597  }
598  break;
599  }
600 
601  case AMDGPU::V_BFE_I32_e64:
602  case AMDGPU::V_BFE_U32_e64: {
603  // e.g.:
604  // from: v_bfe_u32 v1, v0, 8, 8
605  // to SDWA src:v0 src_sel:BYTE_1
606 
607  // offset | width | src_sel
608  // ------------------------
609  // 0 | 8 | BYTE_0
610  // 0 | 16 | WORD_0
611  // 0 | 32 | DWORD ?
612  // 8 | 8 | BYTE_1
613  // 16 | 8 | BYTE_2
614  // 16 | 16 | WORD_1
615  // 24 | 8 | BYTE_3
616 
617  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
618  auto Offset = foldToImm(*Src1);
619  if (!Offset)
620  break;
621 
622  MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
623  auto Width = foldToImm(*Src2);
624  if (!Width)
625  break;
626 
627  SdwaSel SrcSel = DWORD;
628 
629  if (*Offset == 0 && *Width == 8)
630  SrcSel = BYTE_0;
631  else if (*Offset == 0 && *Width == 16)
632  SrcSel = WORD_0;
633  else if (*Offset == 0 && *Width == 32)
634  SrcSel = DWORD;
635  else if (*Offset == 8 && *Width == 8)
636  SrcSel = BYTE_1;
637  else if (*Offset == 16 && *Width == 8)
638  SrcSel = BYTE_2;
639  else if (*Offset == 16 && *Width == 16)
640  SrcSel = WORD_1;
641  else if (*Offset == 24 && *Width == 8)
642  SrcSel = BYTE_3;
643  else
644  break;
645 
646  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
647  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
648 
649  if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
650  break;
651 
652  return std::make_unique<SDWASrcOperand>(
653  Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
654  }
655 
656  case AMDGPU::V_AND_B32_e32:
657  case AMDGPU::V_AND_B32_e64: {
658  // e.g.:
659  // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
660  // to SDWA src:v0 src_sel:WORD_0/BYTE_0
661 
662  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
663  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
664  auto ValSrc = Src1;
665  auto Imm = foldToImm(*Src0);
666 
667  if (!Imm) {
668  Imm = foldToImm(*Src1);
669  ValSrc = Src0;
670  }
671 
672  if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
673  break;
674 
675  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
676 
677  if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
678  break;
679 
680  return std::make_unique<SDWASrcOperand>(
681  ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
682  }
683 
684  case AMDGPU::V_OR_B32_e32:
685  case AMDGPU::V_OR_B32_e64: {
686  // Patterns for dst_unused:UNUSED_PRESERVE.
687  // e.g., from:
688  // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD
689  // src1_sel:WORD_1 src2_sel:WORD1
690  // v_add_f16_e32 v3, v1, v2
691  // v_or_b32_e32 v4, v0, v3
692  // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3
693 
694  // Check if one of operands of v_or_b32 is SDWA instruction
695  using CheckRetType =
696  std::optional<std::pair<MachineOperand *, MachineOperand *>>;
697  auto CheckOROperandsForSDWA =
698  [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
699  if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
700  return CheckRetType(None);
701 
702  MachineOperand *Op1Def = findSingleRegDef(Op1, MRI);
703  if (!Op1Def)
704  return CheckRetType(None);
705 
706  MachineInstr *Op1Inst = Op1Def->getParent();
707  if (!TII->isSDWA(*Op1Inst))
708  return CheckRetType(None);
709 
710  MachineOperand *Op2Def = findSingleRegDef(Op2, MRI);
711  if (!Op2Def)
712  return CheckRetType(None);
713 
714  return CheckRetType(std::make_pair(Op1Def, Op2Def));
715  };
716 
717  MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
718  MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
719  assert(OrSDWA && OrOther);
720  auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
721  if (!Res) {
722  OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
723  OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
724  assert(OrSDWA && OrOther);
725  Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
726  if (!Res)
727  break;
728  }
729 
730  MachineOperand *OrSDWADef = Res->first;
731  MachineOperand *OrOtherDef = Res->second;
732  assert(OrSDWADef && OrOtherDef);
733 
734  MachineInstr *SDWAInst = OrSDWADef->getParent();
735  MachineInstr *OtherInst = OrOtherDef->getParent();
736 
737  // Check that OtherInstr is actually bitwise compatible with SDWAInst = their
738  // destination patterns don't overlap. Compatible instruction can be either
739  // regular instruction with compatible bitness or SDWA instruction with
740  // correct dst_sel
741  // SDWAInst | OtherInst bitness / OtherInst dst_sel
742  // -----------------------------------------------------
743  // DWORD | no / no
744  // WORD_0 | no / BYTE_2/3, WORD_1
745  // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0
746  // BYTE_0 | no / BYTE_1/2/3, WORD_1
747  // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1
748  // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0
749  // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0
750  // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK
751  // but v_add_f32 is not.
752 
753  // TODO: add support for non-SDWA instructions as OtherInst.
754  // For now this only works with SDWA instructions. For regular instructions
755  // there is no way to determine if the instruction writes only 8/16/24-bit
756  // out of full register size and all registers are at min 32-bit wide.
757  if (!TII->isSDWA(*OtherInst))
758  break;
759 
760  SdwaSel DstSel = static_cast<SdwaSel>(
761  TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));;
762  SdwaSel OtherDstSel = static_cast<SdwaSel>(
763  TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
764 
765  bool DstSelAgree = false;
766  switch (DstSel) {
767  case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
768  (OtherDstSel == BYTE_3) ||
769  (OtherDstSel == WORD_1));
770  break;
771  case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
772  (OtherDstSel == BYTE_1) ||
773  (OtherDstSel == WORD_0));
774  break;
775  case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
776  (OtherDstSel == BYTE_2) ||
777  (OtherDstSel == BYTE_3) ||
778  (OtherDstSel == WORD_1));
779  break;
780  case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
781  (OtherDstSel == BYTE_2) ||
782  (OtherDstSel == BYTE_3) ||
783  (OtherDstSel == WORD_1));
784  break;
785  case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
786  (OtherDstSel == BYTE_1) ||
787  (OtherDstSel == BYTE_3) ||
788  (OtherDstSel == WORD_0));
789  break;
790  case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
791  (OtherDstSel == BYTE_1) ||
792  (OtherDstSel == BYTE_2) ||
793  (OtherDstSel == WORD_0));
794  break;
795  default: DstSelAgree = false;
796  }
797 
798  if (!DstSelAgree)
799  break;
800 
801  // Also OtherInst dst_unused should be UNUSED_PAD
802  DstUnused OtherDstUnused = static_cast<DstUnused>(
803  TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
804  if (OtherDstUnused != DstUnused::UNUSED_PAD)
805  break;
806 
807  // Create DstPreserveOperand
808  MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
809  assert(OrDst && OrDst->isReg());
810 
811  return std::make_unique<SDWADstPreserveOperand>(
812  OrDst, OrSDWADef, OrOtherDef, DstSel);
813 
814  }
815  }
816 
817  return std::unique_ptr<SDWAOperand>(nullptr);
818 }
819 
820 #if !defined(NDEBUG)
821 static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
822  Operand.print(OS);
823  return OS;
824 }
825 #endif
826 
827 void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
828  for (MachineInstr &MI : MBB) {
829  if (auto Operand = matchSDWAOperand(MI)) {
830  LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
831  SDWAOperands[&MI] = std::move(Operand);
832  ++NumSDWAPatternsFound;
833  }
834  }
835 }
836 
837 // Convert the V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows
838 // isConvertibleToSDWA to perform its transformation on V_ADD_CO_U32_e32 into
839 // V_ADD_CO_U32_sdwa.
840 //
841 // We are transforming from a VOP3 into a VOP2 form of the instruction.
842 // %19:vgpr_32 = V_AND_B32_e32 255,
843 // killed %16:vgpr_32, implicit $exec
844 // %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
845 // %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
846 // %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
847 // %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
848 //
849 // becomes
850 // %47:vgpr_32 = V_ADD_CO_U32_sdwa
851 // 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
852 // implicit-def $vcc, implicit $exec
853 // %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
854 // %26.sub1:vreg_64, %54:vgpr_32, killed $vcc, implicit $exec
855 void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
856  const GCNSubtarget &ST) const {
857  int Opc = MI.getOpcode();
858  assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
859  "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
860 
861  // Can the candidate MI be shrunk?
862  if (!TII->canShrink(MI, *MRI))
863  return;
864  Opc = AMDGPU::getVOPe32(Opc);
865  // Find the related ADD instruction.
866  const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
867  if (!Sdst)
868  return;
869  MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
870  if (!NextOp)
871  return;
872  MachineInstr &MISucc = *NextOp->getParent();
873 
874  // Make sure the carry in/out are subsequently unused.
875  MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
876  if (!CarryIn)
877  return;
878  MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
879  if (!CarryOut)
880  return;
881  if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
882  return;
883  // Make sure VCC or its subregs are dead before MI.
884  MachineBasicBlock &MBB = *MI.getParent();
885  auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
886  if (Liveness != MachineBasicBlock::LQR_Dead)
887  return;
888  // Check if VCC is referenced in range of (MI,MISucc].
889  for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
890  I != E; ++I) {
891  if (I->modifiesRegister(AMDGPU::VCC, TRI))
892  return;
893  }
894 
895  // Replace MI with V_{SUB|ADD}_I32_e32
896  BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
897  .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
898  .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
899  .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
900  .setMIFlags(MI.getFlags());
901 
902  MI.eraseFromParent();
903 
904  // Since the carry output of MI is now VCC, update its use in MISucc.
905 
906  MISucc.substituteRegister(CarryIn->getReg(), TRI->getVCC(), 0, *TRI);
907 }
908 
909 bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
910  const GCNSubtarget &ST) const {
911  // Check if this is already an SDWA instruction
912  unsigned Opc = MI.getOpcode();
913  if (TII->isSDWA(Opc))
914  return true;
915 
916  // Check if this instruction has opcode that supports SDWA
917  if (AMDGPU::getSDWAOp(Opc) == -1)
918  Opc = AMDGPU::getVOPe32(Opc);
919 
920  if (AMDGPU::getSDWAOp(Opc) == -1)
921  return false;
922 
923  if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
924  return false;
925 
926  if (TII->isVOPC(Opc)) {
927  if (!ST.hasSDWASdst()) {
928  const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
929  if (SDst && (SDst->getReg() != AMDGPU::VCC &&
930  SDst->getReg() != AMDGPU::VCC_LO))
931  return false;
932  }
933 
934  if (!ST.hasSDWAOutModsVOPC() &&
935  (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
936  TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
937  return false;
938 
939  } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
940  !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
941  return false;
942  }
943 
944  if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
945  Opc == AMDGPU::V_FMAC_F32_e32 ||
946  Opc == AMDGPU::V_MAC_F16_e32 ||
947  Opc == AMDGPU::V_MAC_F32_e32))
948  return false;
949 
950  // Check if target supports this SDWA opcode
951  if (TII->pseudoToMCOpcode(Opc) == -1)
952  return false;
953 
954  // FIXME: has SDWA but require handling of implicit VCC use
955  if (Opc == AMDGPU::V_CNDMASK_B32_e32)
956  return false;
957 
958  if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
959  if (!Src0->isReg() && !Src0->isImm())
960  return false;
961  }
962 
963  if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
964  if (!Src1->isReg() && !Src1->isImm())
965  return false;
966  }
967 
968  return true;
969 }
970 
971 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
972  const SDWAOperandsVector &SDWAOperands) {
973 
974  LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);
975 
976  // Convert to sdwa
977  int SDWAOpcode;
978  unsigned Opcode = MI.getOpcode();
979  if (TII->isSDWA(Opcode)) {
980  SDWAOpcode = Opcode;
981  } else {
982  SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
983  if (SDWAOpcode == -1)
984  SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
985  }
986  assert(SDWAOpcode != -1);
987 
988  const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
989 
990  // Create SDWA version of instruction MI and initialize its operands
991  MachineInstrBuilder SDWAInst =
992  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
993  .setMIFlags(MI.getFlags());
994 
995  // Copy dst, if it is present in original then should also be present in SDWA
996  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
997  if (Dst) {
998  assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::vdst));
999  SDWAInst.add(*Dst);
1000  } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
1001  assert(Dst && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst));
1002  SDWAInst.add(*Dst);
1003  } else {
1004  assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst));
1005  SDWAInst.addReg(TRI->getVCC(), RegState::Define);
1006  }
1007 
1008  // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
1009  // src0_modifiers (except for v_nop_sdwa, but it can't get here)
1010  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1011  assert(Src0 && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0) &&
1012  AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_modifiers));
1013  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
1014  SDWAInst.addImm(Mod->getImm());
1015  else
1016  SDWAInst.addImm(0);
1017  SDWAInst.add(*Src0);
1018 
1019  // Copy src1 if present, initialize src1_modifiers.
1020  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1021  if (Src1) {
1022  assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1) &&
1023  AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_modifiers));
1024  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
1025  SDWAInst.addImm(Mod->getImm());
1026  else
1027  SDWAInst.addImm(0);
1028  SDWAInst.add(*Src1);
1029  }
1030 
1031  if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
1032  SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
1033  SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1034  SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1035  // v_mac_f16/32 has additional src2 operand tied to vdst
1036  MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1037  assert(Src2);
1038  SDWAInst.add(*Src2);
1039  }
1040 
1041  // Copy clamp if present, initialize otherwise
1042  assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::clamp));
1043  MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
1044  if (Clamp) {
1045  SDWAInst.add(*Clamp);
1046  } else {
1047  SDWAInst.addImm(0);
1048  }
1049 
1050  // Copy omod if present, initialize otherwise if needed
1051  if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::omod)) {
1052  MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
1053  if (OMod) {
1054  SDWAInst.add(*OMod);
1055  } else {
1056  SDWAInst.addImm(0);
1057  }
1058  }
1059 
1060  // Copy dst_sel if present, initialize otherwise if needed
1061  if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_sel)) {
1062  MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
1063  if (DstSel) {
1064  SDWAInst.add(*DstSel);
1065  } else {
1067  }
1068  }
1069 
1070  // Copy dst_unused if present, initialize otherwise if needed
1071  if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_unused)) {
1072  MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
1073  if (DstUnused) {
1074  SDWAInst.add(*DstUnused);
1075  } else {
1077  }
1078  }
1079 
1080  // Copy src0_sel if present, initialize otherwise
1081  assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_sel));
1082  MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
1083  if (Src0Sel) {
1084  SDWAInst.add(*Src0Sel);
1085  } else {
1087  }
1088 
1089  // Copy src1_sel if present, initialize otherwise if needed
1090  if (Src1) {
1091  assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_sel));
1092  MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
1093  if (Src1Sel) {
1094  SDWAInst.add(*Src1Sel);
1095  } else {
1097  }
1098  }
1099 
1100  // Check for a preserved register that needs to be copied.
1101  auto DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
1102  if (DstUnused &&
1104  // We expect, if we are here, that the instruction was already in it's SDWA form,
1105  // with a tied operand.
1106  assert(Dst && Dst->isTied());
1107  assert(Opcode == static_cast<unsigned int>(SDWAOpcode));
1108  // We also expect a vdst, since sdst can't preserve.
1109  auto PreserveDstIdx = AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst);
1110  assert(PreserveDstIdx != -1);
1111 
1112  auto TiedIdx = MI.findTiedOperandIdx(PreserveDstIdx);
1113  auto Tied = MI.getOperand(TiedIdx);
1114 
1115  SDWAInst.add(Tied);
1116  SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
1117  }
1118 
1119  // Apply all sdwa operand patterns.
1120  bool Converted = false;
1121  for (auto &Operand : SDWAOperands) {
1122  LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
1123  // There should be no intersection between SDWA operands and potential MIs
1124  // e.g.:
1125  // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
1126  // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
1127  // v_add_u32 v3, v4, v2
1128  //
1129  // In that example it is possible that we would fold 2nd instruction into
1130  // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that
1131  // was already destroyed). So if SDWAOperand is also a potential MI then do
1132  // not apply it.
1133  if (PotentialMatches.count(Operand->getParentInst()) == 0)
1134  Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1135  }
1136  if (Converted) {
1137  ConvertedInstructions.push_back(SDWAInst);
1138  } else {
1139  SDWAInst->eraseFromParent();
1140  return false;
1141  }
1142 
1143  LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
1144  ++NumSDWAInstructionsPeepholed;
1145 
1146  MI.eraseFromParent();
1147  return true;
1148 }
1149 
1150 // If an instruction was converted to SDWA it should not have immediates or SGPR
1151 // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
1152 void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
1153  const GCNSubtarget &ST) const {
1154  const MCInstrDesc &Desc = TII->get(MI.getOpcode());
1155  unsigned ConstantBusCount = 0;
1156  for (MachineOperand &Op : MI.explicit_uses()) {
1157  if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
1158  continue;
1159 
1160  unsigned I = MI.getOperandNo(&Op);
1161  if (Desc.OpInfo[I].RegClass == -1 ||
1162  !TRI->isVSSuperClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
1163  continue;
1164 
1165  if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
1166  TRI->isSGPRReg(*MRI, Op.getReg())) {
1167  ++ConstantBusCount;
1168  continue;
1169  }
1170 
1171  Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1172  auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
1173  TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1174  if (Op.isImm())
1175  Copy.addImm(Op.getImm());
1176  else if (Op.isReg())
1177  Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
1178  Op.getSubReg());
1179  Op.ChangeToRegister(VGPR, false);
1180  }
1181 }
1182 
1183 bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
1184  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1185 
1186  if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
1187  return false;
1188 
1189  MRI = &MF.getRegInfo();
1190  TRI = ST.getRegisterInfo();
1191  TII = ST.getInstrInfo();
1192 
1193  // Find all SDWA operands in MF.
1194  bool Ret = false;
1195  for (MachineBasicBlock &MBB : MF) {
1196  bool Changed = false;
1197  do {
1198  // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
1199  // Look for a possible ADD or SUB that resulted from a previously lowered
1200  // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
1201  // lowers the pair of instructions into e32 form.
1202  matchSDWAOperands(MBB);
1203  for (const auto &OperandPair : SDWAOperands) {
1204  const auto &Operand = OperandPair.second;
1205  MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
1206  if (PotentialMI &&
1207  (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
1208  PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
1209  pseudoOpConvertToVOP2(*PotentialMI, ST);
1210  }
1211  SDWAOperands.clear();
1212 
1213  // Generate potential match list.
1214  matchSDWAOperands(MBB);
1215 
1216  for (const auto &OperandPair : SDWAOperands) {
1217  const auto &Operand = OperandPair.second;
1218  MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
1219  if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
1220  PotentialMatches[PotentialMI].push_back(Operand.get());
1221  }
1222  }
1223 
1224  for (auto &PotentialPair : PotentialMatches) {
1225  MachineInstr &PotentialMI = *PotentialPair.first;
1226  convertToSDWA(PotentialMI, PotentialPair.second);
1227  }
1228 
1229  PotentialMatches.clear();
1230  SDWAOperands.clear();
1231 
1232  Changed = !ConvertedInstructions.empty();
1233 
1234  if (Changed)
1235  Ret = true;
1236  while (!ConvertedInstructions.empty())
1237  legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
1238  } while (Changed);
1239  }
1240 
1241  return Ret;
1242 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::AMDGPU::SDWA::BYTE_1
@ BYTE_1
Definition: SIDefines.h:767
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::RegState::ImplicitKill
@ ImplicitKill
Definition: MachineInstrBuilder.h:64
Statistic.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
MapVector.h
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
llvm::MachineInstr::defs
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:678
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:551
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::AMDGPU::SDWA::BYTE_0
@ BYTE_0
Definition: SIDefines.h:766
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::AMDGPU::SDWA::UNUSED_PRESERVE
@ UNUSED_PRESERVE
Definition: SIDefines.h:778
isSameReg
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
Definition: SIPeepholeSDWA.cpp:258
llvm::Optional< int64_t >
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIPeepholeSDWA.cpp:32
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::AMDGPU::SDWA::DstUnused
DstUnused
Definition: SIDefines.h:775
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPU::SDWA::UNUSED_SEXT
@ UNUSED_SEXT
Definition: SIDefines.h:777
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::AMDGPU::hasNamedOperand
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
Definition: AMDGPUBaseInfo.h:303
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:534
llvm::AMDGPU::SDWA::UNUSED_PAD
@ UNUSED_PAD
Definition: SIDefines.h:776
llvm::MachineBasicBlock::remove
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
Definition: MachineBasicBlock.h:989
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:480
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::SDWA::BYTE_2
@ BYTE_2
Definition: SIDefines.h:768
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineInstr::substituteRegister
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
Definition: MachineInstr.cpp:1204
llvm::createSIPeepholeSDWAPass
FunctionPass * createSIPeepholeSDWAPass()
Definition: SIPeepholeSDWA.cpp:194
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:291
llvm::SISrcMods::ABS
@ ABS
Definition: SIDefines.h:220
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::AMDGPU::getSDWAOp
LLVM_READONLY int getSDWAOp(uint16_t Opcode)
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:237
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:515
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:771
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:192
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::codeview::FrameCookieKind::Copy
@ Copy
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AMDGPU::SDWA::DWORD
@ DWORD
Definition: SIDefines.h:772
MachineFunctionPass.h
llvm::SISrcMods::SEXT
@ SEXT
Definition: SIDefines.h:221
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
copyRegOperand
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
Definition: SIPeepholeSDWA.cpp:246
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
findSingleRegUse
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
Definition: SIPeepholeSDWA.cpp:265
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::AMDGPU::SDWA::SdwaSel
SdwaSel
Definition: SIDefines.h:765
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::LQR_Dead
@ LQR_Dead
Register is known to be fully dead.
Definition: MachineBasicBlock.h:1078
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:520
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineRegisterInfo::def_operands
iterator_range< def_iterator > def_operands(Register Reg) const
Definition: MachineRegisterInfo.h:397
llvm::AMDGPU::SDWA::WORD_1
@ WORD_1
Definition: SIDefines.h:771
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:143
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
SDWA
@ SDWA
Definition: SIInstrInfo.cpp:7884
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1327
llvm::MachineBasicBlock::computeRegisterLiveness
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
Definition: MachineBasicBlock.cpp:1500
findSingleRegDef
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
Definition: SIPeepholeSDWA.cpp:287
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:219
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::AMDGPU::SDWA::WORD_0
@ WORD_0
Definition: SIDefines.h:770
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:518
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::AMDGPU::SDWA::BYTE_3
@ BYTE_3
Definition: SIDefines.h:769
llvm::MachineInstr::tieOperands
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
Definition: MachineInstr.cpp:1094
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:692
llvm::SrcOp
Definition: MachineIRBuilder.h:128