LLVM  13.0.0git
SIPeepholeSDWA.cpp
Go to the documentation of this file.
1 //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass tries to apply several peephole SDWA patterns.
10 ///
11 /// E.g. original:
12 /// V_LSHRREV_B32_e32 %0, 16, %1
13 /// V_ADD_CO_U32_e32 %2, %0, %3
14 /// V_LSHLREV_B32_e32 %4, 16, %2
15 ///
16 /// Replace:
17 /// V_ADD_CO_U32_sdwa %4, %1, %3
18 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #include "AMDGPU.h"
23 #include "GCNSubtarget.h"
25 #include "llvm/ADT/MapVector.h"
26 #include "llvm/ADT/Statistic.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "si-peephole-sdwa"
32 
33 STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
34 STATISTIC(NumSDWAInstructionsPeepholed,
35  "Number of instruction converted to SDWA.");
36 
37 namespace {
38 
39 class SDWAOperand;
40 class SDWADstOperand;
41 
42 class SIPeepholeSDWA : public MachineFunctionPass {
43 public:
44  using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
45 
46 private:
48  const SIRegisterInfo *TRI;
49  const SIInstrInfo *TII;
50 
53  SmallVector<MachineInstr *, 8> ConvertedInstructions;
54 
55  Optional<int64_t> foldToImm(const MachineOperand &Op) const;
56 
57 public:
58  static char ID;
59 
60  SIPeepholeSDWA() : MachineFunctionPass(ID) {
62  }
63 
64  bool runOnMachineFunction(MachineFunction &MF) override;
65  void matchSDWAOperands(MachineBasicBlock &MBB);
66  std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
67  bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
68  void pseudoOpConvertToVOP2(MachineInstr &MI,
69  const GCNSubtarget &ST) const;
70  bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
71  void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
72 
73  StringRef getPassName() const override { return "SI Peephole SDWA"; }
74 
75  void getAnalysisUsage(AnalysisUsage &AU) const override {
76  AU.setPreservesCFG();
78  }
79 };
80 
81 class SDWAOperand {
82 private:
83  MachineOperand *Target; // Operand that would be used in converted instruction
84  MachineOperand *Replaced; // Operand that would be replace by Target
85 
86 public:
87  SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
88  : Target(TargetOp), Replaced(ReplacedOp) {
89  assert(Target->isReg());
90  assert(Replaced->isReg());
91  }
92 
93  virtual ~SDWAOperand() = default;
94 
95  virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
96  virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
97 
98  MachineOperand *getTargetOperand() const { return Target; }
99  MachineOperand *getReplacedOperand() const { return Replaced; }
100  MachineInstr *getParentInst() const { return Target->getParent(); }
101 
102  MachineRegisterInfo *getMRI() const {
103  return &getParentInst()->getParent()->getParent()->getRegInfo();
104  }
105 
106 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
107  virtual void print(raw_ostream& OS) const = 0;
108  void dump() const { print(dbgs()); }
109 #endif
110 };
111 
112 using namespace AMDGPU::SDWA;
113 
114 class SDWASrcOperand : public SDWAOperand {
115 private:
116  SdwaSel SrcSel;
117  bool Abs;
118  bool Neg;
119  bool Sext;
120 
121 public:
122  SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
123  SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
124  bool Sext_ = false)
125  : SDWAOperand(TargetOp, ReplacedOp),
126  SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
127 
128  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
129  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
130 
131  SdwaSel getSrcSel() const { return SrcSel; }
132  bool getAbs() const { return Abs; }
133  bool getNeg() const { return Neg; }
134  bool getSext() const { return Sext; }
135 
136  uint64_t getSrcMods(const SIInstrInfo *TII,
137  const MachineOperand *SrcOp) const;
138 
139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
140  void print(raw_ostream& OS) const override;
141 #endif
142 };
143 
144 class SDWADstOperand : public SDWAOperand {
145 private:
146  SdwaSel DstSel;
147  DstUnused DstUn;
148 
149 public:
150 
151  SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
152  SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
153  : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
154 
155  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
156  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
157 
158  SdwaSel getDstSel() const { return DstSel; }
159  DstUnused getDstUnused() const { return DstUn; }
160 
161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
162  void print(raw_ostream& OS) const override;
163 #endif
164 };
165 
166 class SDWADstPreserveOperand : public SDWADstOperand {
167 private:
168  MachineOperand *Preserve;
169 
170 public:
171  SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
172  MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
173  : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
174  Preserve(PreserveOp) {}
175 
176  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
177 
178  MachineOperand *getPreservedOperand() const { return Preserve; }
179 
180 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
181  void print(raw_ostream& OS) const override;
182 #endif
183 };
184 
185 } // end anonymous namespace
186 
187 INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
188 
189 char SIPeepholeSDWA::ID = 0;
190 
191 char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
192 
194  return new SIPeepholeSDWA();
195 }
196 
197 
198 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
200  switch(Sel) {
201  case BYTE_0: OS << "BYTE_0"; break;
202  case BYTE_1: OS << "BYTE_1"; break;
203  case BYTE_2: OS << "BYTE_2"; break;
204  case BYTE_3: OS << "BYTE_3"; break;
205  case WORD_0: OS << "WORD_0"; break;
206  case WORD_1: OS << "WORD_1"; break;
207  case DWORD: OS << "DWORD"; break;
208  }
209  return OS;
210 }
211 
212 static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
213  switch(Un) {
214  case UNUSED_PAD: OS << "UNUSED_PAD"; break;
215  case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
216  case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
217  }
218  return OS;
219 }
220 
222 void SDWASrcOperand::print(raw_ostream& OS) const {
223  OS << "SDWA src: " << *getTargetOperand()
224  << " src_sel:" << getSrcSel()
225  << " abs:" << getAbs() << " neg:" << getNeg()
226  << " sext:" << getSext() << '\n';
227 }
228 
230 void SDWADstOperand::print(raw_ostream& OS) const {
231  OS << "SDWA dst: " << *getTargetOperand()
232  << " dst_sel:" << getDstSel()
233  << " dst_unused:" << getDstUnused() << '\n';
234 }
235 
238  OS << "SDWA preserve dst: " << *getTargetOperand()
239  << " dst_sel:" << getDstSel()
240  << " preserve:" << *getPreservedOperand() << '\n';
241 }
242 
243 #endif
244 
246  assert(To.isReg() && From.isReg());
247  To.setReg(From.getReg());
248  To.setSubReg(From.getSubReg());
249  To.setIsUndef(From.isUndef());
250  if (To.isUse()) {
251  To.setIsKill(From.isKill());
252  } else {
253  To.setIsDead(From.isDead());
254  }
255 }
256 
257 static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
258  return LHS.isReg() &&
259  RHS.isReg() &&
260  LHS.getReg() == RHS.getReg() &&
261  LHS.getSubReg() == RHS.getSubReg();
262 }
263 
265  const MachineRegisterInfo *MRI) {
266  if (!Reg->isReg() || !Reg->isDef())
267  return nullptr;
268 
269  MachineOperand *ResMO = nullptr;
270  for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) {
271  // If there exist use of subreg of Reg then return nullptr
272  if (!isSameReg(UseMO, *Reg))
273  return nullptr;
274 
275  // Check that there is only one instruction that uses Reg
276  if (!ResMO) {
277  ResMO = &UseMO;
278  } else if (ResMO->getParent() != UseMO.getParent()) {
279  return nullptr;
280  }
281  }
282 
283  return ResMO;
284 }
285 
287  const MachineRegisterInfo *MRI) {
288  if (!Reg->isReg())
289  return nullptr;
290 
291  MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg());
292  if (!DefInstr)
293  return nullptr;
294 
295  for (auto &DefMO : DefInstr->defs()) {
296  if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
297  return &DefMO;
298  }
299 
300  // Ignore implicit defs.
301  return nullptr;
302 }
303 
304 uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
305  const MachineOperand *SrcOp) const {
306  uint64_t Mods = 0;
307  const auto *MI = SrcOp->getParent();
308  if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
309  if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
310  Mods = Mod->getImm();
311  }
312  } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
313  if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
314  Mods = Mod->getImm();
315  }
316  }
317  if (Abs || Neg) {
318  assert(!Sext &&
319  "Float and integer src modifiers can't be set simulteniously");
320  Mods |= Abs ? SISrcMods::ABS : 0u;
321  Mods ^= Neg ? SISrcMods::NEG : 0u;
322  } else if (Sext) {
323  Mods |= SISrcMods::SEXT;
324  }
325 
326  return Mods;
327 }
328 
329 MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
330  // For SDWA src operand potential instruction is one that use register
331  // defined by parent instruction
332  MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
333  if (!PotentialMO)
334  return nullptr;
335 
336  return PotentialMO->getParent();
337 }
338 
339 bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
340  // Find operand in instruction that matches source operand and replace it with
341  // target operand. Set corresponding src_sel
342  bool IsPreserveSrc = false;
343  MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
344  MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
345  MachineOperand *SrcMods =
346  TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
347  assert(Src && (Src->isReg() || Src->isImm()));
348  if (!isSameReg(*Src, *getReplacedOperand())) {
349  // If this is not src0 then it could be src1
350  Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
351  SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
352  SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
353 
354  if (!Src ||
355  !isSameReg(*Src, *getReplacedOperand())) {
356  // It's possible this Src is a tied operand for
357  // UNUSED_PRESERVE, in which case we can either
358  // abandon the peephole attempt, or if legal we can
359  // copy the target operand into the tied slot
360  // if the preserve operation will effectively cause the same
361  // result by overwriting the rest of the dst.
362  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
364  TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
365 
366  if (Dst &&
368  // This will work if the tied src is acessing WORD_0, and the dst is
369  // writing WORD_1. Modifiers don't matter because all the bits that
370  // would be impacted are being overwritten by the dst.
371  // Any other case will not work.
372  SdwaSel DstSel = static_cast<SdwaSel>(
373  TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));
374  if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
375  getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
376  IsPreserveSrc = true;
377  auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
378  AMDGPU::OpName::vdst);
379  auto TiedIdx = MI.findTiedOperandIdx(DstIdx);
380  Src = &MI.getOperand(TiedIdx);
381  SrcSel = nullptr;
382  SrcMods = nullptr;
383  } else {
384  // Not legal to convert this src
385  return false;
386  }
387  }
388  }
389  assert(Src && Src->isReg());
390 
391  if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
392  MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
393  MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
394  MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
395  !isSameReg(*Src, *getReplacedOperand())) {
396  // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
397  // src2. This is not allowed.
398  return false;
399  }
400 
401  assert(isSameReg(*Src, *getReplacedOperand()) &&
402  (IsPreserveSrc || (SrcSel && SrcMods)));
403  }
404  copyRegOperand(*Src, *getTargetOperand());
405  if (!IsPreserveSrc) {
406  SrcSel->setImm(getSrcSel());
407  SrcMods->setImm(getSrcMods(TII, Src));
408  }
409  getTargetOperand()->setIsKill(false);
410  return true;
411 }
412 
413 MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
414  // For SDWA dst operand potential instruction is one that defines register
415  // that this operand uses
416  MachineRegisterInfo *MRI = getMRI();
417  MachineInstr *ParentMI = getParentInst();
418 
419  MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
420  if (!PotentialMO)
421  return nullptr;
422 
423  // Check that ParentMI is the only instruction that uses replaced register
424  for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
425  if (&UseInst != ParentMI)
426  return nullptr;
427  }
428 
429  return PotentialMO->getParent();
430 }
431 
432 bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
433  // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
434 
435  if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
436  MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
437  MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
438  MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
439  getDstSel() != AMDGPU::SDWA::DWORD) {
440  // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
441  return false;
442  }
443 
444  MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
445  assert(Operand &&
446  Operand->isReg() &&
447  isSameReg(*Operand, *getReplacedOperand()));
448  copyRegOperand(*Operand, *getTargetOperand());
449  MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
450  assert(DstSel);
451  DstSel->setImm(getDstSel());
452  MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
453  assert(DstUnused);
454  DstUnused->setImm(getDstUnused());
455 
456  // Remove original instruction because it would conflict with our new
457  // instruction by register definition
458  getParentInst()->eraseFromParent();
459  return true;
460 }
461 
462 bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
463  const SIInstrInfo *TII) {
464  // MI should be moved right before v_or_b32.
465  // For this we should clear all kill flags on uses of MI src-operands or else
466  // we can encounter problem with use of killed operand.
467  for (MachineOperand &MO : MI.uses()) {
468  if (!MO.isReg())
469  continue;
470  getMRI()->clearKillFlags(MO.getReg());
471  }
472 
473  // Move MI before v_or_b32
474  auto MBB = MI.getParent();
475  MBB->remove(&MI);
476  MBB->insert(getParentInst(), &MI);
477 
478  // Add Implicit use of preserved register
480  MIB.addReg(getPreservedOperand()->getReg(),
482  getPreservedOperand()->getSubReg());
483 
484  // Tie dst to implicit use
485  MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
486  MI.getNumOperands() - 1);
487 
488  // Convert MI as any other SDWADstOperand and remove v_or_b32
489  return SDWADstOperand::convertToSDWA(MI, TII);
490 }
491 
492 Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
493  if (Op.isImm()) {
494  return Op.getImm();
495  }
496 
497  // If this is not immediate then it can be copy of immediate value, e.g.:
498  // %1 = S_MOV_B32 255;
499  if (Op.isReg()) {
500  for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
501  if (!isSameReg(Op, Def))
502  continue;
503 
504  const MachineInstr *DefInst = Def.getParent();
505  if (!TII->isFoldableCopy(*DefInst))
506  return None;
507 
508  const MachineOperand &Copied = DefInst->getOperand(1);
509  if (!Copied.isImm())
510  return None;
511 
512  return Copied.getImm();
513  }
514  }
515 
516  return None;
517 }
518 
519 std::unique_ptr<SDWAOperand>
520 SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
521  unsigned Opcode = MI.getOpcode();
522  switch (Opcode) {
523  case AMDGPU::V_LSHRREV_B32_e32:
524  case AMDGPU::V_ASHRREV_I32_e32:
525  case AMDGPU::V_LSHLREV_B32_e32:
526  case AMDGPU::V_LSHRREV_B32_e64:
527  case AMDGPU::V_ASHRREV_I32_e64:
528  case AMDGPU::V_LSHLREV_B32_e64: {
529  // from: v_lshrrev_b32_e32 v1, 16/24, v0
530  // to SDWA src:v0 src_sel:WORD_1/BYTE_3
531 
532  // from: v_ashrrev_i32_e32 v1, 16/24, v0
533  // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
534 
535  // from: v_lshlrev_b32_e32 v1, 16/24, v0
536  // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
537  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
538  auto Imm = foldToImm(*Src0);
539  if (!Imm)
540  break;
541 
542  if (*Imm != 16 && *Imm != 24)
543  break;
544 
545  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
546  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
547  if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
548  break;
549 
550  if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
551  Opcode == AMDGPU::V_LSHLREV_B32_e64) {
552  return std::make_unique<SDWADstOperand>(
553  Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
554  } else {
555  return std::make_unique<SDWASrcOperand>(
556  Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
557  Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
558  Opcode != AMDGPU::V_LSHRREV_B32_e64);
559  }
560  break;
561  }
562 
563  case AMDGPU::V_LSHRREV_B16_e32:
564  case AMDGPU::V_ASHRREV_I16_e32:
565  case AMDGPU::V_LSHLREV_B16_e32:
566  case AMDGPU::V_LSHRREV_B16_e64:
567  case AMDGPU::V_ASHRREV_I16_e64:
568  case AMDGPU::V_LSHLREV_B16_e64: {
569  // from: v_lshrrev_b16_e32 v1, 8, v0
570  // to SDWA src:v0 src_sel:BYTE_1
571 
572  // from: v_ashrrev_i16_e32 v1, 8, v0
573  // to SDWA src:v0 src_sel:BYTE_1 sext:1
574 
575  // from: v_lshlrev_b16_e32 v1, 8, v0
576  // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
577  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
578  auto Imm = foldToImm(*Src0);
579  if (!Imm || *Imm != 8)
580  break;
581 
582  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
583  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
584 
585  if (Src1->getReg().isPhysical() || Dst->getReg().isPhysical())
586  break;
587 
588  if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
589  Opcode == AMDGPU::V_LSHLREV_B16_e64) {
590  return std::make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
591  } else {
592  return std::make_unique<SDWASrcOperand>(
593  Src1, Dst, BYTE_1, false, false,
594  Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
595  Opcode != AMDGPU::V_LSHRREV_B16_e64);
596  }
597  break;
598  }
599 
600  case AMDGPU::V_BFE_I32_e64:
601  case AMDGPU::V_BFE_U32_e64: {
602  // e.g.:
603  // from: v_bfe_u32 v1, v0, 8, 8
604  // to SDWA src:v0 src_sel:BYTE_1
605 
606  // offset | width | src_sel
607  // ------------------------
608  // 0 | 8 | BYTE_0
609  // 0 | 16 | WORD_0
610  // 0 | 32 | DWORD ?
611  // 8 | 8 | BYTE_1
612  // 16 | 8 | BYTE_2
613  // 16 | 16 | WORD_1
614  // 24 | 8 | BYTE_3
615 
616  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
617  auto Offset = foldToImm(*Src1);
618  if (!Offset)
619  break;
620 
621  MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
622  auto Width = foldToImm(*Src2);
623  if (!Width)
624  break;
625 
626  SdwaSel SrcSel = DWORD;
627 
628  if (*Offset == 0 && *Width == 8)
629  SrcSel = BYTE_0;
630  else if (*Offset == 0 && *Width == 16)
631  SrcSel = WORD_0;
632  else if (*Offset == 0 && *Width == 32)
633  SrcSel = DWORD;
634  else if (*Offset == 8 && *Width == 8)
635  SrcSel = BYTE_1;
636  else if (*Offset == 16 && *Width == 8)
637  SrcSel = BYTE_2;
638  else if (*Offset == 16 && *Width == 16)
639  SrcSel = WORD_1;
640  else if (*Offset == 24 && *Width == 8)
641  SrcSel = BYTE_3;
642  else
643  break;
644 
645  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
646  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
647 
648  if (Src0->getReg().isPhysical() || Dst->getReg().isPhysical())
649  break;
650 
651  return std::make_unique<SDWASrcOperand>(
652  Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
653  }
654 
655  case AMDGPU::V_AND_B32_e32:
656  case AMDGPU::V_AND_B32_e64: {
657  // e.g.:
658  // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
659  // to SDWA src:v0 src_sel:WORD_0/BYTE_0
660 
661  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
662  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
663  auto ValSrc = Src1;
664  auto Imm = foldToImm(*Src0);
665 
666  if (!Imm) {
667  Imm = foldToImm(*Src1);
668  ValSrc = Src0;
669  }
670 
671  if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
672  break;
673 
674  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
675 
676  if (ValSrc->getReg().isPhysical() || Dst->getReg().isPhysical())
677  break;
678 
679  return std::make_unique<SDWASrcOperand>(
680  ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
681  }
682 
683  case AMDGPU::V_OR_B32_e32:
684  case AMDGPU::V_OR_B32_e64: {
685  // Patterns for dst_unused:UNUSED_PRESERVE.
686  // e.g., from:
687  // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD
688  // src1_sel:WORD_1 src2_sel:WORD1
689  // v_add_f16_e32 v3, v1, v2
690  // v_or_b32_e32 v4, v0, v3
691  // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3
692 
693  // Check if one of operands of v_or_b32 is SDWA instruction
695  auto CheckOROperandsForSDWA =
696  [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
697  if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
698  return CheckRetType(None);
699 
700  MachineOperand *Op1Def = findSingleRegDef(Op1, MRI);
701  if (!Op1Def)
702  return CheckRetType(None);
703 
704  MachineInstr *Op1Inst = Op1Def->getParent();
705  if (!TII->isSDWA(*Op1Inst))
706  return CheckRetType(None);
707 
708  MachineOperand *Op2Def = findSingleRegDef(Op2, MRI);
709  if (!Op2Def)
710  return CheckRetType(None);
711 
712  return CheckRetType(std::make_pair(Op1Def, Op2Def));
713  };
714 
715  MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
716  MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
717  assert(OrSDWA && OrOther);
718  auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
719  if (!Res) {
720  OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
721  OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
722  assert(OrSDWA && OrOther);
723  Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
724  if (!Res)
725  break;
726  }
727 
728  MachineOperand *OrSDWADef = Res->first;
729  MachineOperand *OrOtherDef = Res->second;
730  assert(OrSDWADef && OrOtherDef);
731 
732  MachineInstr *SDWAInst = OrSDWADef->getParent();
733  MachineInstr *OtherInst = OrOtherDef->getParent();
734 
735  // Check that OtherInstr is actually bitwise compatible with SDWAInst = their
736  // destination patterns don't overlap. Compatible instruction can be either
737  // regular instruction with compatible bitness or SDWA instruction with
738  // correct dst_sel
739  // SDWAInst | OtherInst bitness / OtherInst dst_sel
740  // -----------------------------------------------------
741  // DWORD | no / no
742  // WORD_0 | no / BYTE_2/3, WORD_1
743  // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0
744  // BYTE_0 | no / BYTE_1/2/3, WORD_1
745  // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1
746  // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0
747  // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0
748  // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK
749  // but v_add_f32 is not.
750 
751  // TODO: add support for non-SDWA instructions as OtherInst.
752  // For now this only works with SDWA instructions. For regular instructions
753  // there is no way to determine if the instruction writes only 8/16/24-bit
754  // out of full register size and all registers are at min 32-bit wide.
755  if (!TII->isSDWA(*OtherInst))
756  break;
757 
758  SdwaSel DstSel = static_cast<SdwaSel>(
759  TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));;
760  SdwaSel OtherDstSel = static_cast<SdwaSel>(
761  TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
762 
763  bool DstSelAgree = false;
764  switch (DstSel) {
765  case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
766  (OtherDstSel == BYTE_3) ||
767  (OtherDstSel == WORD_1));
768  break;
769  case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
770  (OtherDstSel == BYTE_1) ||
771  (OtherDstSel == WORD_0));
772  break;
773  case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
774  (OtherDstSel == BYTE_2) ||
775  (OtherDstSel == BYTE_3) ||
776  (OtherDstSel == WORD_1));
777  break;
778  case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
779  (OtherDstSel == BYTE_2) ||
780  (OtherDstSel == BYTE_3) ||
781  (OtherDstSel == WORD_1));
782  break;
783  case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
784  (OtherDstSel == BYTE_1) ||
785  (OtherDstSel == BYTE_3) ||
786  (OtherDstSel == WORD_0));
787  break;
788  case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
789  (OtherDstSel == BYTE_1) ||
790  (OtherDstSel == BYTE_2) ||
791  (OtherDstSel == WORD_0));
792  break;
793  default: DstSelAgree = false;
794  }
795 
796  if (!DstSelAgree)
797  break;
798 
799  // Also OtherInst dst_unused should be UNUSED_PAD
800  DstUnused OtherDstUnused = static_cast<DstUnused>(
801  TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
802  if (OtherDstUnused != DstUnused::UNUSED_PAD)
803  break;
804 
805  // Create DstPreserveOperand
806  MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
807  assert(OrDst && OrDst->isReg());
808 
809  return std::make_unique<SDWADstPreserveOperand>(
810  OrDst, OrSDWADef, OrOtherDef, DstSel);
811 
812  }
813  }
814 
815  return std::unique_ptr<SDWAOperand>(nullptr);
816 }
817 
818 #if !defined(NDEBUG)
819 static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) {
820  Operand.print(OS);
821  return OS;
822 }
823 #endif
824 
825 void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
826  for (MachineInstr &MI : MBB) {
827  if (auto Operand = matchSDWAOperand(MI)) {
828  LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
829  SDWAOperands[&MI] = std::move(Operand);
830  ++NumSDWAPatternsFound;
831  }
832  }
833 }
834 
835 // Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
836 // V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA
837 // to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa.
838 //
839 // We are transforming from a VOP3 into a VOP2 form of the instruction.
840 // %19:vgpr_32 = V_AND_B32_e32 255,
841 // killed %16:vgpr_32, implicit $exec
842 // %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
843 // %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
844 // %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
845 // %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
846 //
847 // becomes
848 // %47:vgpr_32 = V_ADD_CO_U32_sdwa
849 // 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
850 // implicit-def $vcc, implicit $exec
851 // %48:vgpr_32 = V_ADDC_U32_e32
852 // 0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec
853 void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
854  const GCNSubtarget &ST) const {
855  int Opc = MI.getOpcode();
856  assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
857  "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
858 
859  // Can the candidate MI be shrunk?
860  if (!TII->canShrink(MI, *MRI))
861  return;
862  Opc = AMDGPU::getVOPe32(Opc);
863  // Find the related ADD instruction.
864  const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
865  if (!Sdst)
866  return;
867  MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
868  if (!NextOp)
869  return;
870  MachineInstr &MISucc = *NextOp->getParent();
871  // Can the successor be shrunk?
872  if (!TII->canShrink(MISucc, *MRI))
873  return;
874  int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode());
875  // Make sure the carry in/out are subsequently unused.
876  MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
877  if (!CarryIn)
878  return;
879  MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
880  if (!CarryOut)
881  return;
882  if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
883  return;
884  // Make sure VCC or its subregs are dead before MI.
885  MachineBasicBlock &MBB = *MI.getParent();
886  auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
887  if (Liveness != MachineBasicBlock::LQR_Dead)
888  return;
889  // Check if VCC is referenced in range of (MI,MISucc].
890  for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
891  I != E; ++I) {
892  if (I->modifiesRegister(AMDGPU::VCC, TRI))
893  return;
894  }
895 
896  // Make the two new e32 instruction variants.
897  // Replace MI with V_{SUB|ADD}_I32_e32
898  BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
899  .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
900  .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
901  .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
902  .setMIFlags(MI.getFlags());
903 
904  MI.eraseFromParent();
905 
906  // Replace MISucc with V_{SUBB|ADDC}_U32_e32
907  BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc))
908  .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst))
909  .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0))
910  .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1))
911  .setMIFlags(MISucc.getFlags());
912 
913  MISucc.eraseFromParent();
914 }
915 
916 bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
917  const GCNSubtarget &ST) const {
918  // Check if this is already an SDWA instruction
919  unsigned Opc = MI.getOpcode();
920  if (TII->isSDWA(Opc))
921  return true;
922 
923  // Check if this instruction has opcode that supports SDWA
924  if (AMDGPU::getSDWAOp(Opc) == -1)
925  Opc = AMDGPU::getVOPe32(Opc);
926 
927  if (AMDGPU::getSDWAOp(Opc) == -1)
928  return false;
929 
930  if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
931  return false;
932 
933  if (TII->isVOPC(Opc)) {
934  if (!ST.hasSDWASdst()) {
935  const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
936  if (SDst && (SDst->getReg() != AMDGPU::VCC &&
937  SDst->getReg() != AMDGPU::VCC_LO))
938  return false;
939  }
940 
941  if (!ST.hasSDWAOutModsVOPC() &&
942  (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
943  TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
944  return false;
945 
946  } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
947  !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
948  return false;
949  }
950 
951  if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
952  Opc == AMDGPU::V_FMAC_F32_e32 ||
953  Opc == AMDGPU::V_MAC_F16_e32 ||
954  Opc == AMDGPU::V_MAC_F32_e32))
955  return false;
956 
957  // Check if target supports this SDWA opcode
958  if (TII->pseudoToMCOpcode(Opc) == -1)
959  return false;
960 
961  // FIXME: has SDWA but require handling of implicit VCC use
962  if (Opc == AMDGPU::V_CNDMASK_B32_e32)
963  return false;
964 
965  if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
966  if (!Src0->isReg() && !Src0->isImm())
967  return false;
968  }
969 
970  if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
971  if (!Src1->isReg() && !Src1->isImm())
972  return false;
973  }
974 
975  return true;
976 }
977 
978 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
979  const SDWAOperandsVector &SDWAOperands) {
980 
981  LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);
982 
983  // Convert to sdwa
984  int SDWAOpcode;
985  unsigned Opcode = MI.getOpcode();
986  if (TII->isSDWA(Opcode)) {
987  SDWAOpcode = Opcode;
988  } else {
989  SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
990  if (SDWAOpcode == -1)
991  SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
992  }
993  assert(SDWAOpcode != -1);
994 
995  const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
996 
997  // Create SDWA version of instruction MI and initialize its operands
998  MachineInstrBuilder SDWAInst =
999  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
1000  .setMIFlags(MI.getFlags());
1001 
1002  // Copy dst, if it is present in original then should also be present in SDWA
1003  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
1004  if (Dst) {
1005  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
1006  SDWAInst.add(*Dst);
1007  } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
1008  assert(Dst &&
1009  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
1010  SDWAInst.add(*Dst);
1011  } else {
1012  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
1013  SDWAInst.addReg(TRI->getVCC(), RegState::Define);
1014  }
1015 
1016  // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
1017  // src0_modifiers (except for v_nop_sdwa, but it can't get here)
1018  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1019  assert(
1020  Src0 &&
1021  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
1022  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
1023  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
1024  SDWAInst.addImm(Mod->getImm());
1025  else
1026  SDWAInst.addImm(0);
1027  SDWAInst.add(*Src0);
1028 
1029  // Copy src1 if present, initialize src1_modifiers.
1030  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1031  if (Src1) {
1032  assert(
1033  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
1034  AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
1035  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
1036  SDWAInst.addImm(Mod->getImm());
1037  else
1038  SDWAInst.addImm(0);
1039  SDWAInst.add(*Src1);
1040  }
1041 
1042  if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
1043  SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
1044  SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1045  SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1046  // v_mac_f16/32 has additional src2 operand tied to vdst
1047  MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1048  assert(Src2);
1049  SDWAInst.add(*Src2);
1050  }
1051 
1052  // Copy clamp if present, initialize otherwise
1053  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
1054  MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
1055  if (Clamp) {
1056  SDWAInst.add(*Clamp);
1057  } else {
1058  SDWAInst.addImm(0);
1059  }
1060 
1061  // Copy omod if present, initialize otherwise if needed
1062  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
1063  MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
1064  if (OMod) {
1065  SDWAInst.add(*OMod);
1066  } else {
1067  SDWAInst.addImm(0);
1068  }
1069  }
1070 
1071  // Copy dst_sel if present, initialize otherwise if needed
1072  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
1073  MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
1074  if (DstSel) {
1075  SDWAInst.add(*DstSel);
1076  } else {
1078  }
1079  }
1080 
1081  // Copy dst_unused if present, initialize otherwise if needed
1082  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
1083  MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
1084  if (DstUnused) {
1085  SDWAInst.add(*DstUnused);
1086  } else {
1088  }
1089  }
1090 
1091  // Copy src0_sel if present, initialize otherwise
1092  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
1093  MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
1094  if (Src0Sel) {
1095  SDWAInst.add(*Src0Sel);
1096  } else {
1098  }
1099 
1100  // Copy src1_sel if present, initialize otherwise if needed
1101  if (Src1) {
1102  assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
1103  MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
1104  if (Src1Sel) {
1105  SDWAInst.add(*Src1Sel);
1106  } else {
1108  }
1109  }
1110 
1111  // Check for a preserved register that needs to be copied.
1112  auto DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
1113  if (DstUnused &&
1115  // We expect, if we are here, that the instruction was already in it's SDWA form,
1116  // with a tied operand.
1117  assert(Dst && Dst->isTied());
1118  assert(Opcode == static_cast<unsigned int>(SDWAOpcode));
1119  // We also expect a vdst, since sdst can't preserve.
1120  auto PreserveDstIdx = AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst);
1121  assert(PreserveDstIdx != -1);
1122 
1123  auto TiedIdx = MI.findTiedOperandIdx(PreserveDstIdx);
1124  auto Tied = MI.getOperand(TiedIdx);
1125 
1126  SDWAInst.add(Tied);
1127  SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
1128  }
1129 
1130  // Apply all sdwa operand patterns.
1131  bool Converted = false;
1132  for (auto &Operand : SDWAOperands) {
1133  LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
1134  // There should be no intesection between SDWA operands and potential MIs
1135  // e.g.:
1136  // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
1137  // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
1138  // v_add_u32 v3, v4, v2
1139  //
1140  // In that example it is possible that we would fold 2nd instruction into 3rd
1141  // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was
1142  // already destroyed). So if SDWAOperand is also a potential MI then do not
1143  // apply it.
1144  if (PotentialMatches.count(Operand->getParentInst()) == 0)
1145  Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1146  }
1147  if (Converted) {
1148  ConvertedInstructions.push_back(SDWAInst);
1149  } else {
1150  SDWAInst->eraseFromParent();
1151  return false;
1152  }
1153 
1154  LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
1155  ++NumSDWAInstructionsPeepholed;
1156 
1157  MI.eraseFromParent();
1158  return true;
1159 }
1160 
1161 // If an instruction was converted to SDWA it should not have immediates or SGPR
1162 // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
1163 void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
1164  const GCNSubtarget &ST) const {
1165  const MCInstrDesc &Desc = TII->get(MI.getOpcode());
1166  unsigned ConstantBusCount = 0;
1167  for (MachineOperand &Op : MI.explicit_uses()) {
1168  if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
1169  continue;
1170 
1171  unsigned I = MI.getOperandNo(&Op);
1172  if (Desc.OpInfo[I].RegClass == -1 ||
1173  !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
1174  continue;
1175 
1176  if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
1177  TRI->isSGPRReg(*MRI, Op.getReg())) {
1178  ++ConstantBusCount;
1179  continue;
1180  }
1181 
1182  Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1183  auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
1184  TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1185  if (Op.isImm())
1186  Copy.addImm(Op.getImm());
1187  else if (Op.isReg())
1188  Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
1189  Op.getSubReg());
1190  Op.ChangeToRegister(VGPR, false);
1191  }
1192 }
1193 
1194 bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
1195  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1196 
1197  if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
1198  return false;
1199 
1200  MRI = &MF.getRegInfo();
1201  TRI = ST.getRegisterInfo();
1202  TII = ST.getInstrInfo();
1203 
1204  // Find all SDWA operands in MF.
1205  bool Ret = false;
1206  for (MachineBasicBlock &MBB : MF) {
1207  bool Changed = false;
1208  do {
1209  // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
1210  // Look for a possible ADD or SUB that resulted from a previously lowered
1211  // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
1212  // lowers the pair of instructions into e32 form.
1213  matchSDWAOperands(MBB);
1214  for (const auto &OperandPair : SDWAOperands) {
1215  const auto &Operand = OperandPair.second;
1216  MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
1217  if (PotentialMI &&
1218  (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
1219  PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
1220  pseudoOpConvertToVOP2(*PotentialMI, ST);
1221  }
1222  SDWAOperands.clear();
1223 
1224  // Generate potential match list.
1225  matchSDWAOperands(MBB);
1226 
1227  for (const auto &OperandPair : SDWAOperands) {
1228  const auto &Operand = OperandPair.second;
1229  MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
1230  if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
1231  PotentialMatches[PotentialMI].push_back(Operand.get());
1232  }
1233  }
1234 
1235  for (auto &PotentialPair : PotentialMatches) {
1236  MachineInstr &PotentialMI = *PotentialPair.first;
1237  convertToSDWA(PotentialMI, PotentialPair.second);
1238  }
1239 
1240  PotentialMatches.clear();
1241  SDWAOperands.clear();
1242 
1243  Changed = !ConvertedInstructions.empty();
1244 
1245  if (Changed)
1246  Ret = true;
1247  while (!ConvertedInstructions.empty())
1248  legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
1249  } while (Changed);
1250  }
1251 
1252  return Ret;
1253 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:499
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SISrcMods::SEXT
@ SEXT
Definition: SIDefines.h:201
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::AMDGPU::SDWA::BYTE_1
@ BYTE_1
Definition: SIDefines.h:641
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:225
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:124
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:497
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:438
MapVector.h
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineInstr::defs
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:644
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:652
llvm::AMDGPU::SDWA::BYTE_0
@ BYTE_0
Definition: SIDefines.h:640
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::AMDGPU::SDWA::UNUSED_PRESERVE
@ UNUSED_PRESERVE
Definition: SIDefines.h:652
isSameReg
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
Definition: SIPeepholeSDWA.cpp:257
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:732
llvm::Optional< int64_t >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIPeepholeSDWA.cpp:31
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::AMDGPU::SDWA::DstUnused
DstUnused
Definition: SIDefines.h:649
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::AMDGPU::SDWA::UNUSED_SEXT
@ UNUSED_SEXT
Definition: SIDefines.h:651
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::MachineInstr::getFlags
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:325
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:526
llvm::AMDGPU::SDWA::UNUSED_PAD
@ UNUSED_PAD
Definition: SIDefines.h:650
llvm::MachineBasicBlock::remove
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
Definition: MachineBasicBlock.h:839
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:367
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:468
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::SDWA::BYTE_2
@ BYTE_2
Definition: SIDefines.h:642
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::createSIPeepholeSDWAPass
FunctionPass * createSIPeepholeSDWAPass()
Definition: SIPeepholeSDWA.cpp:193
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:45
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::AMDGPU::getSDWAOp
LLVM_READONLY int getSDWAOp(uint16_t Opcode)
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:235
llvm::None
const NoneType None
Definition: None.h:23
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:49
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:503
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:723
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:191
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::codeview::FrameCookieKind::Copy
@ Copy
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::AMDGPU::SDWA::DWORD
@ DWORD
Definition: SIDefines.h:646
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
copyRegOperand
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
Definition: SIPeepholeSDWA.cpp:245
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:207
findSingleRegUse
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
Definition: SIPeepholeSDWA.cpp:264
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::AMDGPU::SDWA::SdwaSel
SdwaSel
Definition: SIDefines.h:639
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::MachineBasicBlock::LQR_Dead
@ LQR_Dead
Register is known to be fully dead.
Definition: MachineBasicBlock.h:928
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::RegState::ImplicitKill
@ ImplicitKill
Definition: MachineInstrBuilder.h:65
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:508
llvm::ModRefInfo::Mod
@ Mod
The access may modify the value stored in memory.
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::SISrcMods::ABS
@ ABS
Definition: SIDefines.h:200
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:362
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:199
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineRegisterInfo::def_operands
iterator_range< def_iterator > def_operands(Register Reg) const
Definition: MachineRegisterInfo.h:389
llvm::AMDGPU::SDWA::WORD_1
@ WORD_1
Definition: SIDefines.h:645
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:142
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
SDWA
@ SDWA
Definition: SIInstrInfo.cpp:7344
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1335
llvm::MachineBasicBlock::computeRegisterLiveness
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
Definition: MachineBasicBlock.cpp:1508
findSingleRegDef
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
Definition: SIPeepholeSDWA.cpp:286
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:320
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::AMDGPU::SDWA::WORD_0
@ WORD_0
Definition: SIDefines.h:644
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:510
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:274
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:481
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::AMDGPU::SDWA::BYTE_3
@ BYTE_3
Definition: SIDefines.h:643
llvm::MachineInstr::tieOperands
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
Definition: MachineInstr.cpp:1099
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::SrcOp
Definition: MachineIRBuilder.h:119
getReg
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:580
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38