LLVM  10.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
14 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 struct FoldCandidate {
32  union {
33  MachineOperand *OpToFold;
34  uint64_t ImmToFold;
35  int FrameIndexToFold;
36  };
37  int ShrinkOpcode;
38  unsigned char UseOpNo;
40  bool Commuted;
41 
42  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
43  bool Commuted_ = false,
44  int ShrinkOp = -1) :
45  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
46  Kind(FoldOp->getType()),
47  Commuted(Commuted_) {
48  if (FoldOp->isImm()) {
49  ImmToFold = FoldOp->getImm();
50  } else if (FoldOp->isFI()) {
51  FrameIndexToFold = FoldOp->getIndex();
52  } else {
53  assert(FoldOp->isReg() || FoldOp->isGlobal());
54  OpToFold = FoldOp;
55  }
56  }
57 
58  bool isFI() const {
59  return Kind == MachineOperand::MO_FrameIndex;
60  }
61 
62  bool isImm() const {
63  return Kind == MachineOperand::MO_Immediate;
64  }
65 
66  bool isReg() const {
67  return Kind == MachineOperand::MO_Register;
68  }
69 
70  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
71 
72  bool isCommuted() const {
73  return Commuted;
74  }
75 
76  bool needsShrink() const {
77  return ShrinkOpcode != -1;
78  }
79 
80  int getShrinkOpcode() const {
81  return ShrinkOpcode;
82  }
83 };
84 
85 class SIFoldOperands : public MachineFunctionPass {
86 public:
87  static char ID;
89  const SIInstrInfo *TII;
90  const SIRegisterInfo *TRI;
91  const GCNSubtarget *ST;
92  const SIMachineFunctionInfo *MFI;
93 
94  void foldOperand(MachineOperand &OpToFold,
96  int UseOpIdx,
98  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
99 
100  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
101 
102  const MachineOperand *isClamp(const MachineInstr &MI) const;
103  bool tryFoldClamp(MachineInstr &MI);
104 
105  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
106  bool tryFoldOMod(MachineInstr &MI);
107 
108 public:
109  SIFoldOperands() : MachineFunctionPass(ID) {
111  }
112 
113  bool runOnMachineFunction(MachineFunction &MF) override;
114 
115  StringRef getPassName() const override { return "SI Fold Operands"; }
116 
117  void getAnalysisUsage(AnalysisUsage &AU) const override {
118  AU.setPreservesCFG();
120  }
121 };
122 
123 } // End anonymous namespace.
124 
125 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
126  "SI Fold Operands", false, false)
127 
128 char SIFoldOperands::ID = 0;
129 
131 
132 // Wrapper around isInlineConstant that understands special cases when
133 // instruction types are replaced during operand folding.
135  const MachineInstr &UseMI,
136  unsigned OpNo,
137  const MachineOperand &OpToFold) {
138  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
139  return true;
140 
141  unsigned Opc = UseMI.getOpcode();
142  switch (Opc) {
143  case AMDGPU::V_MAC_F32_e64:
144  case AMDGPU::V_MAC_F16_e64:
145  case AMDGPU::V_FMAC_F32_e64:
146  case AMDGPU::V_FMAC_F16_e64: {
147  // Special case for mac. Since this is replaced with mad when folded into
148  // src2, we need to check the legality for the final instruction.
149  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
150  if (static_cast<int>(OpNo) == Src2Idx) {
151  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
152  Opc == AMDGPU::V_FMAC_F16_e64;
153  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
154  Opc == AMDGPU::V_FMAC_F32_e64;
155 
156  unsigned Opc = IsFMA ?
157  (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
158  (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
159  const MCInstrDesc &MadDesc = TII->get(Opc);
160  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
161  }
162  return false;
163  }
164  default:
165  return false;
166  }
167 }
168 
169 // TODO: Add heuristic that the frame index might not fit in the addressing mode
170 // immediate offset to avoid materializing in loops.
171 static bool frameIndexMayFold(const SIInstrInfo *TII,
172  const MachineInstr &UseMI,
173  int OpNo,
174  const MachineOperand &OpToFold) {
175  return OpToFold.isFI() &&
176  (TII->isMUBUF(UseMI) || TII->isFLATScratch(UseMI)) &&
177  OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
178 }
179 
181  return new SIFoldOperands();
182 }
183 
184 static bool updateOperand(FoldCandidate &Fold,
185  const SIInstrInfo &TII,
186  const TargetRegisterInfo &TRI,
187  const GCNSubtarget &ST) {
188  MachineInstr *MI = Fold.UseMI;
189  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
190  assert(Old.isReg());
191 
192  if (Fold.isImm()) {
193  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
194  !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
195  AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
196  ST.hasInv2PiInlineImm())) {
197  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
198  // already set.
199  unsigned Opcode = MI->getOpcode();
200  int OpNo = MI->getOperandNo(&Old);
201  int ModIdx = -1;
202  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
203  ModIdx = AMDGPU::OpName::src0_modifiers;
204  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
205  ModIdx = AMDGPU::OpName::src1_modifiers;
206  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
207  ModIdx = AMDGPU::OpName::src2_modifiers;
208  assert(ModIdx != -1);
209  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
210  MachineOperand &Mod = MI->getOperand(ModIdx);
211  unsigned Val = Mod.getImm();
212  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
213  return false;
214  // Only apply the following transformation if that operand requries
215  // a packed immediate.
216  switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
221  // If upper part is all zero we do not need op_sel_hi.
222  if (!isUInt<16>(Fold.ImmToFold)) {
223  if (!(Fold.ImmToFold & 0xffff)) {
224  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
225  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
226  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
227  return true;
228  }
229  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
230  Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
231  return true;
232  }
233  break;
234  default:
235  break;
236  }
237  }
238  }
239 
240  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
241  MachineBasicBlock *MBB = MI->getParent();
242  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
243  if (Liveness != MachineBasicBlock::LQR_Dead)
244  return false;
245 
247  int Op32 = Fold.getShrinkOpcode();
248  MachineOperand &Dst0 = MI->getOperand(0);
249  MachineOperand &Dst1 = MI->getOperand(1);
250  assert(Dst0.isDef() && Dst1.isDef());
251 
252  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
253 
254  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
255  Register NewReg0 = MRI.createVirtualRegister(Dst0RC);
256 
257  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
258 
259  if (HaveNonDbgCarryUse) {
260  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
261  .addReg(AMDGPU::VCC, RegState::Kill);
262  }
263 
264  // Keep the old instruction around to avoid breaking iterators, but
265  // replace it with a dummy instruction to remove uses.
266  //
267  // FIXME: We should not invert how this pass looks at operands to avoid
268  // this. Should track set of foldable movs instead of looking for uses
269  // when looking at a use.
270  Dst0.setReg(NewReg0);
271  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
272  MI->RemoveOperand(I);
273  MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
274 
275  if (Fold.isCommuted())
276  TII.commuteInstruction(*Inst32, false);
277  return true;
278  }
279 
280  assert(!Fold.needsShrink() && "not handled");
281 
282  if (Fold.isImm()) {
283  Old.ChangeToImmediate(Fold.ImmToFold);
284  return true;
285  }
286 
287  if (Fold.isGlobal()) {
288  Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
289  Fold.OpToFold->getTargetFlags());
290  return true;
291  }
292 
293  if (Fold.isFI()) {
294  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
295  return true;
296  }
297 
298  MachineOperand *New = Fold.OpToFold;
299  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
300  Old.setIsUndef(New->isUndef());
301  return true;
302 }
303 
305  const MachineInstr *MI) {
306  for (auto Candidate : FoldList) {
307  if (Candidate.UseMI == MI)
308  return true;
309  }
310  return false;
311 }
312 
314  MachineInstr *MI, unsigned OpNo,
315  MachineOperand *OpToFold,
316  const SIInstrInfo *TII) {
317  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
318  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
319  unsigned Opc = MI->getOpcode();
320  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
321  Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
322  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
323  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
324  Opc == AMDGPU::V_FMAC_F16_e64;
325  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
326  Opc == AMDGPU::V_FMAC_F32_e64;
327  unsigned NewOpc = IsFMA ?
328  (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
329  (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
330 
331  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
332  // to fold the operand.
333  MI->setDesc(TII->get(NewOpc));
334  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
335  if (FoldAsMAD) {
336  MI->untieRegOperand(OpNo);
337  return true;
338  }
339  MI->setDesc(TII->get(Opc));
340  }
341 
342  // Special case for s_setreg_b32
343  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
344  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
345  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
346  return true;
347  }
348 
349  // If we are already folding into another operand of MI, then
350  // we can't commute the instruction, otherwise we risk making the
351  // other fold illegal.
352  if (isUseMIInFoldList(FoldList, MI))
353  return false;
354 
355  unsigned CommuteOpNo = OpNo;
356 
357  // Operand is not legal, so try to commute the instruction to
358  // see if this makes it possible to fold.
359  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
360  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
361  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
362 
363  if (CanCommute) {
364  if (CommuteIdx0 == OpNo)
365  CommuteOpNo = CommuteIdx1;
366  else if (CommuteIdx1 == OpNo)
367  CommuteOpNo = CommuteIdx0;
368  }
369 
370 
371  // One of operands might be an Imm operand, and OpNo may refer to it after
372  // the call of commuteInstruction() below. Such situations are avoided
373  // here explicitly as OpNo must be a register operand to be a candidate
374  // for memory folding.
375  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
376  !MI->getOperand(CommuteIdx1).isReg()))
377  return false;
378 
379  if (!CanCommute ||
380  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
381  return false;
382 
383  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
384  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
385  Opc == AMDGPU::V_SUB_I32_e64 ||
386  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
387  (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
389 
390  // Verify the other operand is a VGPR, otherwise we would violate the
391  // constant bus restriction.
392  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
393  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
394  if (!OtherOp.isReg() ||
395  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
396  return false;
397 
398  assert(MI->getOperand(1).isDef());
399 
400  // Make sure to get the 32-bit version of the commuted opcode.
401  unsigned MaybeCommutedOpc = MI->getOpcode();
402  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
403 
404  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
405  Op32));
406  return true;
407  }
408 
409  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
410  return false;
411  }
412 
413  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
414  return true;
415  }
416 
417  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
418  return true;
419 }
420 
421 // If the use operand doesn't care about the value, this may be an operand only
422 // used for register indexing, in which case it is unsafe to fold.
423 static bool isUseSafeToFold(const SIInstrInfo *TII,
424  const MachineInstr &MI,
425  const MachineOperand &UseMO) {
426  return !UseMO.isUndef() && !TII->isSDWA(MI);
427  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
428 }
429 
430 static bool tryToFoldACImm(const SIInstrInfo *TII,
431  const MachineOperand &OpToFold,
433  unsigned UseOpIdx,
434  SmallVectorImpl<FoldCandidate> &FoldList) {
435  const MCInstrDesc &Desc = UseMI->getDesc();
436  const MCOperandInfo *OpInfo = Desc.OpInfo;
437  if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
438  return false;
439 
440  uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
443  return false;
444 
445  if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
446  TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
447  UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
448  return true;
449  }
450 
451  if (!OpToFold.isReg())
452  return false;
453 
454  Register UseReg = OpToFold.getReg();
455  if (!Register::isVirtualRegister(UseReg))
456  return false;
457 
458  if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
459  return FC.UseMI == UseMI; }) != FoldList.end())
460  return false;
461 
463  const MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
464  if (!Def || !Def->isRegSequence())
465  return false;
466 
467  int64_t Imm;
469  for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
470  const MachineOperand &Sub = Def->getOperand(I);
471  if (!Sub.isReg() || Sub.getSubReg())
472  return false;
473  MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub.getReg());
474  while (SubDef && !SubDef->isMoveImmediate() &&
475  !SubDef->getOperand(1).isImm() && TII->isFoldableCopy(*SubDef))
476  SubDef = MRI.getUniqueVRegDef(SubDef->getOperand(1).getReg());
477  if (!SubDef || !SubDef->isMoveImmediate() || !SubDef->getOperand(1).isImm())
478  return false;
479  Op = &SubDef->getOperand(1);
480  auto SubImm = Op->getImm();
481  if (I == 1) {
482  if (!TII->isInlineConstant(SubDef->getOperand(1), OpTy))
483  return false;
484 
485  Imm = SubImm;
486  continue;
487  }
488  if (Imm != SubImm)
489  return false; // Can only fold splat constants
490  }
491 
492  if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
493  return false;
494 
495  FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
496  return true;
497 }
498 
499 void SIFoldOperands::foldOperand(
500  MachineOperand &OpToFold,
502  int UseOpIdx,
504  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
505  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
506 
507  if (!isUseSafeToFold(TII, *UseMI, UseOp))
508  return;
509 
510  // FIXME: Fold operands with subregs.
511  if (UseOp.isReg() && OpToFold.isReg()) {
512  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
513  return;
514 
515  // Don't fold subregister extracts into tied operands, only if it is a full
516  // copy since a subregister use tied to a full register def doesn't really
517  // make sense. e.g. don't fold:
518  //
519  // %1 = COPY %0:sub1
520  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
521  //
522  // into
523  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
524  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
525  return;
526  }
527 
528  // Special case for REG_SEQUENCE: We can't fold literals into
529  // REG_SEQUENCE instructions, so we have to fold them into the
530  // uses of REG_SEQUENCE.
531  if (UseMI->isRegSequence()) {
532  Register RegSeqDstReg = UseMI->getOperand(0).getReg();
533  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
534 
537  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
538  RSUse != RSE; RSUse = Next) {
539  Next = std::next(RSUse);
540 
541  MachineInstr *RSUseMI = RSUse->getParent();
542 
543  if (tryToFoldACImm(TII, UseMI->getOperand(0), RSUseMI,
544  RSUse.getOperandNo(), FoldList))
545  continue;
546 
547  if (RSUse->getSubReg() != RegSeqDstSubReg)
548  continue;
549 
550  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
551  CopiesToReplace);
552  }
553 
554  return;
555  }
556 
557  if (tryToFoldACImm(TII, OpToFold, UseMI, UseOpIdx, FoldList))
558  return;
559 
560  if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
561  // Sanity check that this is a stack access.
562  // FIXME: Should probably use stack pseudos before frame lowering.
563  MachineOperand *SOff = TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
564  if (!SOff->isReg() || (SOff->getReg() != MFI->getScratchWaveOffsetReg() &&
565  SOff->getReg() != MFI->getStackPtrOffsetReg()))
566  return;
567 
568  if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
569  MFI->getScratchRSrcReg())
570  return;
571 
572  // A frame index will resolve to a positive constant, so it should always be
573  // safe to fold the addressing mode, even pre-GFX9.
574  UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
575  SOff->setReg(MFI->getStackPtrOffsetReg());
576  return;
577  }
578 
579  bool FoldingImmLike =
580  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
581 
582  if (FoldingImmLike && UseMI->isCopy()) {
583  Register DestReg = UseMI->getOperand(0).getReg();
584 
585  // Don't fold into a copy to a physical register. Doing so would interfere
586  // with the register coalescer's logic which would avoid redundant
587  // initalizations.
588  if (DestReg.isPhysical())
589  return;
590 
591  const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg);
592 
593  Register SrcReg = UseMI->getOperand(1).getReg();
594  if (SrcReg.isVirtual()) { // XXX - This can be an assert?
595  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
596  if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
600  Use = MRI->use_begin(DestReg), E = MRI->use_end();
601  Use != E; Use = NextUse) {
602  NextUse = std::next(Use);
603  FoldCandidate FC = FoldCandidate(Use->getParent(),
604  Use.getOperandNo(), &UseMI->getOperand(1));
605  CopyUses.push_back(FC);
606  }
607  for (auto & F : CopyUses) {
608  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
609  FoldList, CopiesToReplace);
610  }
611  }
612  }
613 
614  if (DestRC == &AMDGPU::AGPR_32RegClass &&
615  TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
616  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
617  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
618  CopiesToReplace.push_back(UseMI);
619  return;
620  }
621 
622  // In order to fold immediates into copies, we need to change the
623  // copy to a MOV.
624 
625  unsigned MovOp = TII->getMovOpcode(DestRC);
626  if (MovOp == AMDGPU::COPY)
627  return;
628 
629  UseMI->setDesc(TII->get(MovOp));
630  MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
631  MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
632  while (ImpOpI != ImpOpE) {
633  MachineInstr::mop_iterator Tmp = ImpOpI;
634  ImpOpI++;
635  UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
636  }
637  CopiesToReplace.push_back(UseMI);
638  } else {
639  if (UseMI->isCopy() && OpToFold.isReg() &&
641  TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
642  TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
643  !UseMI->getOperand(1).getSubReg()) {
644  unsigned Size = TII->getOpSize(*UseMI, 1);
645  UseMI->getOperand(1).setReg(OpToFold.getReg());
646  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
647  UseMI->getOperand(1).setIsKill(false);
648  CopiesToReplace.push_back(UseMI);
649  OpToFold.setIsKill(false);
650  if (Size != 4)
651  return;
652  if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
653  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
654  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
655  else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
656  TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
657  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32));
658  return;
659  }
660 
661  unsigned UseOpc = UseMI->getOpcode();
662  if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
663  (UseOpc == AMDGPU::V_READLANE_B32 &&
664  (int)UseOpIdx ==
665  AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
666  // %vgpr = V_MOV_B32 imm
667  // %sgpr = V_READFIRSTLANE_B32 %vgpr
668  // =>
669  // %sgpr = S_MOV_B32 imm
670  if (FoldingImmLike) {
672  UseMI->getOperand(UseOpIdx).getReg(),
673  *OpToFold.getParent(),
674  *UseMI))
675  return;
676 
677  UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
678 
679  // FIXME: ChangeToImmediate should clear subreg
680  UseMI->getOperand(1).setSubReg(0);
681  if (OpToFold.isImm())
682  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
683  else
684  UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex());
685  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
686  return;
687  }
688 
689  if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
691  UseMI->getOperand(UseOpIdx).getReg(),
692  *OpToFold.getParent(),
693  *UseMI))
694  return;
695 
696  // %vgpr = COPY %sgpr0
697  // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
698  // =>
699  // %sgpr1 = COPY %sgpr0
700  UseMI->setDesc(TII->get(AMDGPU::COPY));
701  UseMI->getOperand(1).setReg(OpToFold.getReg());
702  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
703  UseMI->getOperand(1).setIsKill(false);
704  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
705  return;
706  }
707  }
708 
709  const MCInstrDesc &UseDesc = UseMI->getDesc();
710 
711  // Don't fold into target independent nodes. Target independent opcodes
712  // don't have defined register classes.
713  if (UseDesc.isVariadic() ||
714  UseOp.isImplicit() ||
715  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
716  return;
717  }
718 
719  if (!FoldingImmLike) {
720  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
721 
722  // FIXME: We could try to change the instruction from 64-bit to 32-bit
723  // to enable more folding opportunites. The shrink operands pass
724  // already does this.
725  return;
726  }
727 
728 
729  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
730  const TargetRegisterClass *FoldRC =
731  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
732 
733  // Split 64-bit constants into 32-bits for folding.
734  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
735  Register UseReg = UseOp.getReg();
736  const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
737 
738  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
739  return;
740 
741  APInt Imm(64, OpToFold.getImm());
742  if (UseOp.getSubReg() == AMDGPU::sub0) {
743  Imm = Imm.getLoBits(32);
744  } else {
745  assert(UseOp.getSubReg() == AMDGPU::sub1);
746  Imm = Imm.getHiBits(32);
747  }
748 
749  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
750  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
751  return;
752  }
753 
754 
755 
756  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
757 }
758 
759 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
760  uint32_t LHS, uint32_t RHS) {
761  switch (Opcode) {
762  case AMDGPU::V_AND_B32_e64:
763  case AMDGPU::V_AND_B32_e32:
764  case AMDGPU::S_AND_B32:
765  Result = LHS & RHS;
766  return true;
767  case AMDGPU::V_OR_B32_e64:
768  case AMDGPU::V_OR_B32_e32:
769  case AMDGPU::S_OR_B32:
770  Result = LHS | RHS;
771  return true;
772  case AMDGPU::V_XOR_B32_e64:
773  case AMDGPU::V_XOR_B32_e32:
774  case AMDGPU::S_XOR_B32:
775  Result = LHS ^ RHS;
776  return true;
777  case AMDGPU::V_LSHL_B32_e64:
778  case AMDGPU::V_LSHL_B32_e32:
779  case AMDGPU::S_LSHL_B32:
780  // The instruction ignores the high bits for out of bounds shifts.
781  Result = LHS << (RHS & 31);
782  return true;
783  case AMDGPU::V_LSHLREV_B32_e64:
784  case AMDGPU::V_LSHLREV_B32_e32:
785  Result = RHS << (LHS & 31);
786  return true;
787  case AMDGPU::V_LSHR_B32_e64:
788  case AMDGPU::V_LSHR_B32_e32:
789  case AMDGPU::S_LSHR_B32:
790  Result = LHS >> (RHS & 31);
791  return true;
792  case AMDGPU::V_LSHRREV_B32_e64:
793  case AMDGPU::V_LSHRREV_B32_e32:
794  Result = RHS >> (LHS & 31);
795  return true;
796  case AMDGPU::V_ASHR_I32_e64:
797  case AMDGPU::V_ASHR_I32_e32:
798  case AMDGPU::S_ASHR_I32:
799  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
800  return true;
801  case AMDGPU::V_ASHRREV_I32_e64:
802  case AMDGPU::V_ASHRREV_I32_e32:
803  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
804  return true;
805  default:
806  return false;
807  }
808 }
809 
810 static unsigned getMovOpc(bool IsScalar) {
811  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
812 }
813 
814 /// Remove any leftover implicit operands from mutating the instruction. e.g.
815 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
816 /// anymore.
818  const MCInstrDesc &Desc = MI.getDesc();
819  unsigned NumOps = Desc.getNumOperands() +
820  Desc.getNumImplicitUses() +
821  Desc.getNumImplicitDefs();
822 
823  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
824  MI.RemoveOperand(I);
825 }
826 
827 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
828  MI.setDesc(NewDesc);
830 }
831 
833  MachineOperand &Op) {
834  if (Op.isReg()) {
835  // If this has a subregister, it obviously is a register source.
836  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
838  return &Op;
839 
840  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
841  if (Def && Def->isMoveImmediate()) {
842  MachineOperand &ImmSrc = Def->getOperand(1);
843  if (ImmSrc.isImm())
844  return &ImmSrc;
845  }
846  }
847 
848  return &Op;
849 }
850 
851 // Try to simplify operations with a constant that may appear after instruction
852 // selection.
853 // TODO: See if a frame index with a fixed offset can fold.
855  const SIInstrInfo *TII,
856  MachineInstr *MI,
857  MachineOperand *ImmOp) {
858  unsigned Opc = MI->getOpcode();
859  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
860  Opc == AMDGPU::S_NOT_B32) {
861  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
862  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
863  return true;
864  }
865 
866  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
867  if (Src1Idx == -1)
868  return false;
869 
870  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
871  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
872  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
873 
874  if (!Src0->isImm() && !Src1->isImm())
875  return false;
876 
877  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
878  if (Src0->isImm() && Src0->getImm() == 0) {
879  // v_lshl_or_b32 0, X, Y -> copy Y
880  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
881  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
882  MI->RemoveOperand(Src1Idx);
883  MI->RemoveOperand(Src0Idx);
884 
885  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
886  return true;
887  }
888  }
889 
890  // and k0, k1 -> v_mov_b32 (k0 & k1)
891  // or k0, k1 -> v_mov_b32 (k0 | k1)
892  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
893  if (Src0->isImm() && Src1->isImm()) {
894  int32_t NewImm;
895  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
896  return false;
897 
898  const SIRegisterInfo &TRI = TII->getRegisterInfo();
899  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
900 
901  // Be careful to change the right operand, src0 may belong to a different
902  // instruction.
903  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
904  MI->RemoveOperand(Src1Idx);
905  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
906  return true;
907  }
908 
909  if (!MI->isCommutable())
910  return false;
911 
912  if (Src0->isImm() && !Src1->isImm()) {
913  std::swap(Src0, Src1);
914  std::swap(Src0Idx, Src1Idx);
915  }
916 
917  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
918  if (Opc == AMDGPU::V_OR_B32_e64 ||
919  Opc == AMDGPU::V_OR_B32_e32 ||
920  Opc == AMDGPU::S_OR_B32) {
921  if (Src1Val == 0) {
922  // y = or x, 0 => y = copy x
923  MI->RemoveOperand(Src1Idx);
924  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
925  } else if (Src1Val == -1) {
926  // y = or x, -1 => y = v_mov_b32 -1
927  MI->RemoveOperand(Src1Idx);
928  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
929  } else
930  return false;
931 
932  return true;
933  }
934 
935  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
936  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
937  MI->getOpcode() == AMDGPU::S_AND_B32) {
938  if (Src1Val == 0) {
939  // y = and x, 0 => y = v_mov_b32 0
940  MI->RemoveOperand(Src0Idx);
941  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
942  } else if (Src1Val == -1) {
943  // y = and x, -1 => y = copy x
944  MI->RemoveOperand(Src1Idx);
945  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
947  } else
948  return false;
949 
950  return true;
951  }
952 
953  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
954  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
955  MI->getOpcode() == AMDGPU::S_XOR_B32) {
956  if (Src1Val == 0) {
957  // y = xor x, 0 => y = copy x
958  MI->RemoveOperand(Src1Idx);
959  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
960  return true;
961  }
962  }
963 
964  return false;
965 }
966 
967 // Try to fold an instruction into a simpler one
968 static bool tryFoldInst(const SIInstrInfo *TII,
969  MachineInstr *MI) {
970  unsigned Opc = MI->getOpcode();
971 
972  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
973  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
974  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
975  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
976  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
977  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
978  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
979  if (Src1->isIdenticalTo(*Src0) &&
980  (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
981  (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
982  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
983  auto &NewDesc =
984  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
985  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
986  if (Src2Idx != -1)
987  MI->RemoveOperand(Src2Idx);
988  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
989  if (Src1ModIdx != -1)
990  MI->RemoveOperand(Src1ModIdx);
991  if (Src0ModIdx != -1)
992  MI->RemoveOperand(Src0ModIdx);
993  mutateCopyOp(*MI, NewDesc);
994  LLVM_DEBUG(dbgs() << *MI << '\n');
995  return true;
996  }
997  }
998 
999  return false;
1000 }
1001 
1002 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
1003  MachineOperand &OpToFold) const {
1004  // We need mutate the operands of new mov instructions to add implicit
1005  // uses of EXEC, but adding them invalidates the use_iterator, so defer
1006  // this.
1007  SmallVector<MachineInstr *, 4> CopiesToReplace;
1009  MachineOperand &Dst = MI.getOperand(0);
1010 
1011  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1012  if (FoldingImm) {
1013  unsigned NumLiteralUses = 0;
1014  MachineOperand *NonInlineUse = nullptr;
1015  int NonInlineUseOpNo = -1;
1016 
1019  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
1020  Use != E; Use = NextUse) {
1021  NextUse = std::next(Use);
1022  MachineInstr *UseMI = Use->getParent();
1023  unsigned OpNo = Use.getOperandNo();
1024 
1025  // Folding the immediate may reveal operations that can be constant
1026  // folded or replaced with a copy. This can happen for example after
1027  // frame indices are lowered to constants or from splitting 64-bit
1028  // constants.
1029  //
1030  // We may also encounter cases where one or both operands are
1031  // immediates materialized into a register, which would ordinarily not
1032  // be folded due to multiple uses or operand constraints.
1033 
1034  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
1035  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
1036 
1037  // Some constant folding cases change the same immediate's use to a new
1038  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
1039  // again. The same constant folded instruction could also have a second
1040  // use operand.
1041  NextUse = MRI->use_begin(Dst.getReg());
1042  FoldList.clear();
1043  continue;
1044  }
1045 
1046  // Try to fold any inline immediate uses, and then only fold other
1047  // constants if they have one use.
1048  //
1049  // The legality of the inline immediate must be checked based on the use
1050  // operand, not the defining instruction, because 32-bit instructions
1051  // with 32-bit inline immediate sources may be used to materialize
1052  // constants used in 16-bit operands.
1053  //
1054  // e.g. it is unsafe to fold:
1055  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
1056  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
1057 
1058  // Folding immediates with more than one use will increase program size.
1059  // FIXME: This will also reduce register usage, which may be better
1060  // in some cases. A better heuristic is needed.
1061  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
1062  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
1063  } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
1064  foldOperand(OpToFold, UseMI, OpNo, FoldList,
1065  CopiesToReplace);
1066  } else {
1067  if (++NumLiteralUses == 1) {
1068  NonInlineUse = &*Use;
1069  NonInlineUseOpNo = OpNo;
1070  }
1071  }
1072  }
1073 
1074  if (NumLiteralUses == 1) {
1075  MachineInstr *UseMI = NonInlineUse->getParent();
1076  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
1077  }
1078  } else {
1079  // Folding register.
1082  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
1083  Use != E; ++Use) {
1084  UsesToProcess.push_back(Use);
1085  }
1086  for (auto U : UsesToProcess) {
1087  MachineInstr *UseMI = U->getParent();
1088 
1089  foldOperand(OpToFold, UseMI, U.getOperandNo(),
1090  FoldList, CopiesToReplace);
1091  }
1092  }
1093 
1094  MachineFunction *MF = MI.getParent()->getParent();
1095  // Make sure we add EXEC uses to any new v_mov instructions created.
1096  for (MachineInstr *Copy : CopiesToReplace)
1097  Copy->addImplicitDefUseOperands(*MF);
1098 
1099  for (FoldCandidate &Fold : FoldList) {
1100  if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) {
1101  Register Reg = Fold.OpToFold->getReg();
1102  MachineInstr *DefMI = Fold.OpToFold->getParent();
1103  if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
1104  execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
1105  continue;
1106  }
1107  if (updateOperand(Fold, *TII, *TRI, *ST)) {
1108  // Clear kill flags.
1109  if (Fold.isReg()) {
1110  assert(Fold.OpToFold && Fold.OpToFold->isReg());
1111  // FIXME: Probably shouldn't bother trying to fold if not an
1112  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
1113  // copies.
1114  MRI->clearKillFlags(Fold.OpToFold->getReg());
1115  }
1116  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
1117  << static_cast<int>(Fold.UseOpNo) << " of "
1118  << *Fold.UseMI << '\n');
1119  tryFoldInst(TII, Fold.UseMI);
1120  } else if (Fold.isCommuted()) {
1121  // Restoring instruction's original operand order if fold has failed.
1122  TII->commuteInstruction(*Fold.UseMI, false);
1123  }
1124  }
1125 }
1126 
1127 // Clamp patterns are canonically selected to v_max_* instructions, so only
1128 // handle them.
1129 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1130  unsigned Op = MI.getOpcode();
1131  switch (Op) {
1132  case AMDGPU::V_MAX_F32_e64:
1133  case AMDGPU::V_MAX_F16_e64:
1134  case AMDGPU::V_MAX_F64:
1135  case AMDGPU::V_PK_MAX_F16: {
1136  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
1137  return nullptr;
1138 
1139  // Make sure sources are identical.
1140  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1141  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1142  if (!Src0->isReg() || !Src1->isReg() ||
1143  Src0->getReg() != Src1->getReg() ||
1144  Src0->getSubReg() != Src1->getSubReg() ||
1145  Src0->getSubReg() != AMDGPU::NoSubRegister)
1146  return nullptr;
1147 
1148  // Can't fold up if we have modifiers.
1149  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1150  return nullptr;
1151 
1152  unsigned Src0Mods
1153  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
1154  unsigned Src1Mods
1155  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
1156 
1157  // Having a 0 op_sel_hi would require swizzling the output in the source
1158  // instruction, which we can't do.
1159  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
1160  : 0u;
1161  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1162  return nullptr;
1163  return Src0;
1164  }
1165  default:
1166  return nullptr;
1167  }
1168 }
1169 
1170 // We obviously have multiple uses in a clamp since the register is used twice
1171 // in the same instruction.
1172 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
1173  int Count = 0;
1174  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
1175  I != E; ++I) {
1176  if (++Count > 1)
1177  return false;
1178  }
1179 
1180  return true;
1181 }
1182 
1183 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1184 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
1185  const MachineOperand *ClampSrc = isClamp(MI);
1186  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
1187  return false;
1188 
1189  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1190 
1191  // The type of clamp must be compatible.
1192  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
1193  return false;
1194 
1195  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
1196  if (!DefClamp)
1197  return false;
1198 
1199  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
1200  << '\n');
1201 
1202  // Clamp is applied after omod, so it is OK if omod is set.
1203  DefClamp->setImm(1);
1204  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1205  MI.eraseFromParent();
1206  return true;
1207 }
1208 
1209 static int getOModValue(unsigned Opc, int64_t Val) {
1210  switch (Opc) {
1211  case AMDGPU::V_MUL_F32_e64: {
1212  switch (static_cast<uint32_t>(Val)) {
1213  case 0x3f000000: // 0.5
1214  return SIOutMods::DIV2;
1215  case 0x40000000: // 2.0
1216  return SIOutMods::MUL2;
1217  case 0x40800000: // 4.0
1218  return SIOutMods::MUL4;
1219  default:
1220  return SIOutMods::NONE;
1221  }
1222  }
1223  case AMDGPU::V_MUL_F16_e64: {
1224  switch (static_cast<uint16_t>(Val)) {
1225  case 0x3800: // 0.5
1226  return SIOutMods::DIV2;
1227  case 0x4000: // 2.0
1228  return SIOutMods::MUL2;
1229  case 0x4400: // 4.0
1230  return SIOutMods::MUL4;
1231  default:
1232  return SIOutMods::NONE;
1233  }
1234  }
1235  default:
1236  llvm_unreachable("invalid mul opcode");
1237  }
1238 }
1239 
1240 // FIXME: Does this really not support denormals with f16?
1241 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1242 // handled, so will anything other than that break?
1243 std::pair<const MachineOperand *, int>
1244 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1245  unsigned Op = MI.getOpcode();
1246  switch (Op) {
1247  case AMDGPU::V_MUL_F32_e64:
1248  case AMDGPU::V_MUL_F16_e64: {
1249  // If output denormals are enabled, omod is ignored.
1250  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1251  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1252  return std::make_pair(nullptr, SIOutMods::NONE);
1253 
1254  const MachineOperand *RegOp = nullptr;
1255  const MachineOperand *ImmOp = nullptr;
1256  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1257  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1258  if (Src0->isImm()) {
1259  ImmOp = Src0;
1260  RegOp = Src1;
1261  } else if (Src1->isImm()) {
1262  ImmOp = Src1;
1263  RegOp = Src0;
1264  } else
1265  return std::make_pair(nullptr, SIOutMods::NONE);
1266 
1267  int OMod = getOModValue(Op, ImmOp->getImm());
1268  if (OMod == SIOutMods::NONE ||
1269  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1270  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1271  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1272  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1273  return std::make_pair(nullptr, SIOutMods::NONE);
1274 
1275  return std::make_pair(RegOp, OMod);
1276  }
1277  case AMDGPU::V_ADD_F32_e64:
1278  case AMDGPU::V_ADD_F16_e64: {
1279  // If output denormals are enabled, omod is ignored.
1280  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1281  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1282  return std::make_pair(nullptr, SIOutMods::NONE);
1283 
1284  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1285  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1286  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1287 
1288  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1289  Src0->getSubReg() == Src1->getSubReg() &&
1290  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1291  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1292  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1293  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1294  return std::make_pair(Src0, SIOutMods::MUL2);
1295 
1296  return std::make_pair(nullptr, SIOutMods::NONE);
1297  }
1298  default:
1299  return std::make_pair(nullptr, SIOutMods::NONE);
1300  }
1301 }
1302 
1303 // FIXME: Does this need to check IEEE bit on function?
1304 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1305  const MachineOperand *RegOp;
1306  int OMod;
1307  std::tie(RegOp, OMod) = isOMod(MI);
1308  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1309  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1310  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1311  return false;
1312 
1313  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1314  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1315  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1316  return false;
1317 
1318  // Clamp is applied after omod. If the source already has clamp set, don't
1319  // fold it.
1320  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1321  return false;
1322 
1323  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1324 
1325  DefOMod->setImm(OMod);
1326  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1327  MI.eraseFromParent();
1328  return true;
1329 }
1330 
1331 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1332  if (skipFunction(MF.getFunction()))
1333  return false;
1334 
1335  MRI = &MF.getRegInfo();
1336  ST = &MF.getSubtarget<GCNSubtarget>();
1337  TII = ST->getInstrInfo();
1338  TRI = &TII->getRegisterInfo();
1339  MFI = MF.getInfo<SIMachineFunctionInfo>();
1340 
1341  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1342  // correctly handle signed zeros.
1343  //
1344  // FIXME: Also need to check strictfp
1345  bool IsIEEEMode = MFI->getMode().IEEE;
1346  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1347 
1348  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1350  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1351  Next = std::next(I);
1352  MachineInstr &MI = *I;
1353 
1354  tryFoldInst(TII, &MI);
1355 
1356  if (!TII->isFoldableCopy(MI)) {
1357  // TODO: Omod might be OK if there is NSZ only on the source
1358  // instruction, and not the omod multiply.
1359  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1360  !tryFoldOMod(MI))
1361  tryFoldClamp(MI);
1362  continue;
1363  }
1364 
1365  MachineOperand &OpToFold = MI.getOperand(1);
1366  bool FoldingImm =
1367  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1368 
1369  // FIXME: We could also be folding things like TargetIndexes.
1370  if (!FoldingImm && !OpToFold.isReg())
1371  continue;
1372 
1373  if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg()))
1374  continue;
1375 
1376  // Prevent folding operands backwards in the function. For example,
1377  // the COPY opcode must not be replaced by 1 in this example:
1378  //
1379  // %3 = COPY %vgpr0; VGPR_32:%3
1380  // ...
1381  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1382  MachineOperand &Dst = MI.getOperand(0);
1383  if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg()))
1384  continue;
1385 
1386  foldInstOperand(MI, OpToFold);
1387  }
1388  }
1389  return false;
1390 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:551
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:573
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:527
void setIsUndef(bool Val=true)
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isRegSequence() const
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
unsigned const TargetRegisterInfo * TRI
F(f)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:724
AMDGPU::SIModeRegisterDefaults getMode() const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:570
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:443
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:89
void setReg(Register Reg)
Change the register this operand corresponds to.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:47
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI, const GCNSubtarget &ST)
Address of a global value.
unsigned const MachineRegisterInfo * MRI
bool isFoldableCopy(const MachineInstr &MI) const
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:254
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Register UseReg(const MachineOperand &MO)
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
static bool frameIndexMayFold(const SIInstrInfo *TII, const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold)
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder MachineInstrBuilder & DefMI
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:491
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
FunctionPass * createSIFoldOperandsPass()
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
uint32_t Size
Definition: Profile.cpp:46
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:427
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
Register getReg() const
getReg - Returns the register number.
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:70
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:297
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:883
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isImplicit() const
static bool tryToFoldACImm(const SIInstrInfo *TII, const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl< FoldCandidate > &FoldList)
const SIRegisterInfo * getRegisterInfo() const override
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:507