LLVM  13.0.0git
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "GCNSubtarget.h"
14 #include "SIMachineFunctionInfo.h"
17 
18 #define DEBUG_TYPE "si-fold-operands"
19 using namespace llvm;
20 
21 namespace {
22 
23 struct FoldCandidate {
25  union {
26  MachineOperand *OpToFold;
27  uint64_t ImmToFold;
28  int FrameIndexToFold;
29  };
30  int ShrinkOpcode;
31  unsigned UseOpNo;
33  bool Commuted;
34 
35  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
36  bool Commuted_ = false,
37  int ShrinkOp = -1) :
38  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
39  Kind(FoldOp->getType()),
40  Commuted(Commuted_) {
41  if (FoldOp->isImm()) {
42  ImmToFold = FoldOp->getImm();
43  } else if (FoldOp->isFI()) {
44  FrameIndexToFold = FoldOp->getIndex();
45  } else {
46  assert(FoldOp->isReg() || FoldOp->isGlobal());
47  OpToFold = FoldOp;
48  }
49  }
50 
51  bool isFI() const {
53  }
54 
55  bool isImm() const {
57  }
58 
59  bool isReg() const {
61  }
62 
63  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
64 
65  bool isCommuted() const {
66  return Commuted;
67  }
68 
69  bool needsShrink() const {
70  return ShrinkOpcode != -1;
71  }
72 
73  int getShrinkOpcode() const {
74  return ShrinkOpcode;
75  }
76 };
77 
78 class SIFoldOperands : public MachineFunctionPass {
79 public:
80  static char ID;
82  const SIInstrInfo *TII;
83  const SIRegisterInfo *TRI;
84  const GCNSubtarget *ST;
85  const SIMachineFunctionInfo *MFI;
86 
87  void foldOperand(MachineOperand &OpToFold,
89  int UseOpIdx,
91  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
92 
93  bool tryFoldCndMask(MachineInstr &MI) const;
94  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
95 
96  const MachineOperand *isClamp(const MachineInstr &MI) const;
97  bool tryFoldClamp(MachineInstr &MI);
98 
99  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
100  bool tryFoldOMod(MachineInstr &MI);
101  bool tryFoldRegSequence(MachineInstr &MI);
102  bool tryFoldLCSSAPhi(MachineInstr &MI);
103  bool tryFoldLoad(MachineInstr &MI);
104 
105 public:
106  SIFoldOperands() : MachineFunctionPass(ID) {
108  }
109 
110  bool runOnMachineFunction(MachineFunction &MF) override;
111 
112  StringRef getPassName() const override { return "SI Fold Operands"; }
113 
114  void getAnalysisUsage(AnalysisUsage &AU) const override {
115  AU.setPreservesCFG();
117  }
118 };
119 
120 } // End anonymous namespace.
121 
122 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
123  "SI Fold Operands", false, false)
124 
125 char SIFoldOperands::ID = 0;
126 
127 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
128 
129 // Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
130 static unsigned macToMad(unsigned Opc) {
131  switch (Opc) {
132  case AMDGPU::V_MAC_F32_e64:
133  return AMDGPU::V_MAD_F32_e64;
134  case AMDGPU::V_MAC_F16_e64:
135  return AMDGPU::V_MAD_F16_e64;
136  case AMDGPU::V_FMAC_F32_e64:
137  return AMDGPU::V_FMA_F32_e64;
138  case AMDGPU::V_FMAC_F16_e64:
139  return AMDGPU::V_FMA_F16_gfx9_e64;
140  case AMDGPU::V_FMAC_LEGACY_F32_e64:
141  return AMDGPU::V_FMA_LEGACY_F32_e64;
142  case AMDGPU::V_FMAC_F64_e64:
143  return AMDGPU::V_FMA_F64_e64;
144  }
145  return AMDGPU::INSTRUCTION_LIST_END;
146 }
147 
148 // Wrapper around isInlineConstant that understands special cases when
149 // instruction types are replaced during operand folding.
151  const MachineInstr &UseMI,
152  unsigned OpNo,
153  const MachineOperand &OpToFold) {
154  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
155  return true;
156 
157  unsigned Opc = UseMI.getOpcode();
158  unsigned NewOpc = macToMad(Opc);
159  if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
160  // Special case for mac. Since this is replaced with mad when folded into
161  // src2, we need to check the legality for the final instruction.
162  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
163  if (static_cast<int>(OpNo) == Src2Idx) {
164  const MCInstrDesc &MadDesc = TII->get(NewOpc);
165  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
166  }
167  }
168 
169  return false;
170 }
171 
172 // TODO: Add heuristic that the frame index might not fit in the addressing mode
173 // immediate offset to avoid materializing in loops.
174 static bool frameIndexMayFold(const SIInstrInfo *TII,
175  const MachineInstr &UseMI,
176  int OpNo,
177  const MachineOperand &OpToFold) {
178  if (!OpToFold.isFI())
179  return false;
180 
181  if (TII->isMUBUF(UseMI))
182  return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
183  AMDGPU::OpName::vaddr);
184  if (!TII->isFLATScratch(UseMI))
185  return false;
186 
187  int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
188  AMDGPU::OpName::saddr);
189  if (OpNo == SIdx)
190  return true;
191 
192  int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
193  AMDGPU::OpName::vaddr);
194  return OpNo == VIdx && SIdx == -1;
195 }
196 
198  return new SIFoldOperands();
199 }
200 
201 static bool updateOperand(FoldCandidate &Fold,
202  const SIInstrInfo &TII,
203  const TargetRegisterInfo &TRI,
204  const GCNSubtarget &ST) {
205  MachineInstr *MI = Fold.UseMI;
206  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
207  assert(Old.isReg());
208 
209  if (Fold.isImm()) {
210  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
211  !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
212  AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
213  ST.hasInv2PiInlineImm())) {
214  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
215  // already set.
216  unsigned Opcode = MI->getOpcode();
217  int OpNo = MI->getOperandNo(&Old);
218  int ModIdx = -1;
219  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
220  ModIdx = AMDGPU::OpName::src0_modifiers;
221  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
222  ModIdx = AMDGPU::OpName::src1_modifiers;
223  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
224  ModIdx = AMDGPU::OpName::src2_modifiers;
225  assert(ModIdx != -1);
226  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
227  MachineOperand &Mod = MI->getOperand(ModIdx);
228  unsigned Val = Mod.getImm();
229  if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
230  // Only apply the following transformation if that operand requries
231  // a packed immediate.
232  switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
237  // If upper part is all zero we do not need op_sel_hi.
238  if (!isUInt<16>(Fold.ImmToFold)) {
239  if (!(Fold.ImmToFold & 0xffff)) {
240  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
241  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
242  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
243  return true;
244  }
245  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
246  Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
247  return true;
248  }
249  break;
250  default:
251  break;
252  }
253  }
254  }
255  }
256 
257  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
258  MachineBasicBlock *MBB = MI->getParent();
259  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16);
260  if (Liveness != MachineBasicBlock::LQR_Dead) {
261  LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
262  return false;
263  }
264 
266  int Op32 = Fold.getShrinkOpcode();
267  MachineOperand &Dst0 = MI->getOperand(0);
268  MachineOperand &Dst1 = MI->getOperand(1);
269  assert(Dst0.isDef() && Dst1.isDef());
270 
271  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
272 
273  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
274  Register NewReg0 = MRI.createVirtualRegister(Dst0RC);
275 
276  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
277 
278  if (HaveNonDbgCarryUse) {
279  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
280  .addReg(AMDGPU::VCC, RegState::Kill);
281  }
282 
283  // Keep the old instruction around to avoid breaking iterators, but
284  // replace it with a dummy instruction to remove uses.
285  //
286  // FIXME: We should not invert how this pass looks at operands to avoid
287  // this. Should track set of foldable movs instead of looking for uses
288  // when looking at a use.
289  Dst0.setReg(NewReg0);
290  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
291  MI->RemoveOperand(I);
292  MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
293 
294  if (Fold.isCommuted())
295  TII.commuteInstruction(*Inst32, false);
296  return true;
297  }
298 
299  assert(!Fold.needsShrink() && "not handled");
300 
301  if (Fold.isImm()) {
302  Old.ChangeToImmediate(Fold.ImmToFold);
303  return true;
304  }
305 
306  if (Fold.isGlobal()) {
307  Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
308  Fold.OpToFold->getTargetFlags());
309  return true;
310  }
311 
312  if (Fold.isFI()) {
313  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
314  return true;
315  }
316 
317  MachineOperand *New = Fold.OpToFold;
318  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
319  Old.setIsUndef(New->isUndef());
320  return true;
321 }
322 
324  const MachineInstr *MI) {
325  for (auto Candidate : FoldList) {
326  if (Candidate.UseMI == MI)
327  return true;
328  }
329  return false;
330 }
331 
333  MachineInstr *MI, unsigned OpNo,
334  MachineOperand *FoldOp, bool Commuted = false,
335  int ShrinkOp = -1) {
336  // Skip additional folding on the same operand.
337  for (FoldCandidate &Fold : FoldList)
338  if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
339  return;
340  LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
341  << " operand " << OpNo << "\n " << *MI);
342  FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
343 }
344 
346  MachineInstr *MI, unsigned OpNo,
347  MachineOperand *OpToFold,
348  const SIInstrInfo *TII) {
349  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
350  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
351  unsigned Opc = MI->getOpcode();
352  unsigned NewOpc = macToMad(Opc);
353  if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
354  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
355  // to fold the operand.
356  MI->setDesc(TII->get(NewOpc));
357  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
358  if (FoldAsMAD) {
359  MI->untieRegOperand(OpNo);
360  return true;
361  }
362  MI->setDesc(TII->get(Opc));
363  }
364 
365  // Special case for s_setreg_b32
366  if (OpToFold->isImm()) {
367  unsigned ImmOpc = 0;
368  if (Opc == AMDGPU::S_SETREG_B32)
369  ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
370  else if (Opc == AMDGPU::S_SETREG_B32_mode)
371  ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
372  if (ImmOpc) {
373  MI->setDesc(TII->get(ImmOpc));
374  appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
375  return true;
376  }
377  }
378 
379  // If we are already folding into another operand of MI, then
380  // we can't commute the instruction, otherwise we risk making the
381  // other fold illegal.
382  if (isUseMIInFoldList(FoldList, MI))
383  return false;
384 
385  unsigned CommuteOpNo = OpNo;
386 
387  // Operand is not legal, so try to commute the instruction to
388  // see if this makes it possible to fold.
389  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
390  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
391  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
392 
393  if (CanCommute) {
394  if (CommuteIdx0 == OpNo)
395  CommuteOpNo = CommuteIdx1;
396  else if (CommuteIdx1 == OpNo)
397  CommuteOpNo = CommuteIdx0;
398  }
399 
400 
401  // One of operands might be an Imm operand, and OpNo may refer to it after
402  // the call of commuteInstruction() below. Such situations are avoided
403  // here explicitly as OpNo must be a register operand to be a candidate
404  // for memory folding.
405  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
406  !MI->getOperand(CommuteIdx1).isReg()))
407  return false;
408 
409  if (!CanCommute ||
410  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
411  return false;
412 
413  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
414  if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
415  Opc == AMDGPU::V_SUB_CO_U32_e64 ||
416  Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
417  (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
418  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
419 
420  // Verify the other operand is a VGPR, otherwise we would violate the
421  // constant bus restriction.
422  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
423  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
424  if (!OtherOp.isReg() ||
425  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
426  return false;
427 
428  assert(MI->getOperand(1).isDef());
429 
430  // Make sure to get the 32-bit version of the commuted opcode.
431  unsigned MaybeCommutedOpc = MI->getOpcode();
432  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
433 
434  appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
435  return true;
436  }
437 
438  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
439  return false;
440  }
441 
442  appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
443  return true;
444  }
445 
446  // Check the case where we might introduce a second constant operand to a
447  // scalar instruction
448  if (TII->isSALU(MI->getOpcode())) {
449  const MCInstrDesc &InstDesc = MI->getDesc();
450  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
451  const SIRegisterInfo &SRI = TII->getRegisterInfo();
452 
453  // Fine if the operand can be encoded as an inline constant
454  if (OpToFold->isImm()) {
455  if (!SRI.opCanUseInlineConstant(OpInfo.OperandType) ||
456  !TII->isInlineConstant(*OpToFold, OpInfo)) {
457  // Otherwise check for another constant
458  for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
459  auto &Op = MI->getOperand(i);
460  if (OpNo != i &&
461  TII->isLiteralConstantLike(Op, OpInfo)) {
462  return false;
463  }
464  }
465  }
466  }
467  }
468 
469  appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
470  return true;
471 }
472 
473 // If the use operand doesn't care about the value, this may be an operand only
474 // used for register indexing, in which case it is unsafe to fold.
475 static bool isUseSafeToFold(const SIInstrInfo *TII,
476  const MachineInstr &MI,
477  const MachineOperand &UseMO) {
478  if (UseMO.isUndef() || TII->isSDWA(MI))
479  return false;
480 
481  switch (MI.getOpcode()) {
482  case AMDGPU::V_MOV_B32_e32:
483  case AMDGPU::V_MOV_B32_e64:
484  case AMDGPU::V_MOV_B64_PSEUDO:
485  // Do not fold into an indirect mov.
486  return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0);
487  }
488 
489  return true;
490  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
491 }
492 
493 // Find a def of the UseReg, check if it is a reg_sequence and find initializers
494 // for each subreg, tracking it to foldable inline immediate if possible.
495 // Returns true on success.
496 static bool getRegSeqInit(
497  SmallVectorImpl<std::pair<MachineOperand*, unsigned>> &Defs,
498  Register UseReg, uint8_t OpTy,
499  const SIInstrInfo *TII, const MachineRegisterInfo &MRI) {
501  if (!Def || !Def->isRegSequence())
502  return false;
503 
504  for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
505  MachineOperand *Sub = &Def->getOperand(I);
506  assert (Sub->isReg());
507 
508  for (MachineInstr *SubDef = MRI.getVRegDef(Sub->getReg());
509  SubDef && Sub->isReg() && !Sub->getSubReg() &&
510  TII->isFoldableCopy(*SubDef);
511  SubDef = MRI.getVRegDef(Sub->getReg())) {
512  MachineOperand *Op = &SubDef->getOperand(1);
513  if (Op->isImm()) {
514  if (TII->isInlineConstant(*Op, OpTy))
515  Sub = Op;
516  break;
517  }
518  if (!Op->isReg())
519  break;
520  Sub = Op;
521  }
522 
523  Defs.emplace_back(Sub, Def->getOperand(I + 1).getImm());
524  }
525 
526  return true;
527 }
528 
529 static bool tryToFoldACImm(const SIInstrInfo *TII,
530  const MachineOperand &OpToFold,
532  unsigned UseOpIdx,
533  SmallVectorImpl<FoldCandidate> &FoldList) {
534  const MCInstrDesc &Desc = UseMI->getDesc();
535  const MCOperandInfo *OpInfo = Desc.OpInfo;
536  if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
537  return false;
538 
539  uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
544  return false;
545 
546  if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
547  TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
548  UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
549  return true;
550  }
551 
552  if (!OpToFold.isReg())
553  return false;
554 
555  Register UseReg = OpToFold.getReg();
556  if (!UseReg.isVirtual())
557  return false;
558 
559  if (isUseMIInFoldList(FoldList, UseMI))
560  return false;
561 
563 
564  // Maybe it is just a COPY of an immediate itself.
566  MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
567  if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
568  MachineOperand &DefOp = Def->getOperand(1);
569  if (DefOp.isImm() && TII->isInlineConstant(DefOp, OpTy) &&
570  TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
571  UseMI->getOperand(UseOpIdx).ChangeToImmediate(DefOp.getImm());
572  return true;
573  }
574  }
575 
577  if (!getRegSeqInit(Defs, UseReg, OpTy, TII, MRI))
578  return false;
579 
580  int32_t Imm;
581  for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
582  const MachineOperand *Op = Defs[I].first;
583  if (!Op->isImm())
584  return false;
585 
586  auto SubImm = Op->getImm();
587  if (!I) {
588  Imm = SubImm;
589  if (!TII->isInlineConstant(*Op, OpTy) ||
590  !TII->isOperandLegal(*UseMI, UseOpIdx, Op))
591  return false;
592 
593  continue;
594  }
595  if (Imm != SubImm)
596  return false; // Can only fold splat constants
597  }
598 
599  appendFoldCandidate(FoldList, UseMI, UseOpIdx, Defs[0].first);
600  return true;
601 }
602 
603 void SIFoldOperands::foldOperand(
604  MachineOperand &OpToFold,
606  int UseOpIdx,
608  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
609  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
610 
611  if (!isUseSafeToFold(TII, *UseMI, UseOp))
612  return;
613 
614  // FIXME: Fold operands with subregs.
615  if (UseOp.isReg() && OpToFold.isReg()) {
616  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
617  return;
618  }
619 
620  // Special case for REG_SEQUENCE: We can't fold literals into
621  // REG_SEQUENCE instructions, so we have to fold them into the
622  // uses of REG_SEQUENCE.
623  if (UseMI->isRegSequence()) {
624  Register RegSeqDstReg = UseMI->getOperand(0).getReg();
625  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
626 
627  for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
628  MachineInstr *RSUseMI = RSUse.getParent();
629 
630  if (tryToFoldACImm(TII, UseMI->getOperand(0), RSUseMI,
631  RSUseMI->getOperandNo(&RSUse), FoldList))
632  continue;
633 
634  if (RSUse.getSubReg() != RegSeqDstSubReg)
635  continue;
636 
637  foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
638  CopiesToReplace);
639  }
640 
641  return;
642  }
643 
644  if (tryToFoldACImm(TII, OpToFold, UseMI, UseOpIdx, FoldList))
645  return;
646 
647  if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
648  // Sanity check that this is a stack access.
649  // FIXME: Should probably use stack pseudos before frame lowering.
650 
651  if (TII->isMUBUF(*UseMI)) {
652  if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
653  MFI->getScratchRSrcReg())
654  return;
655 
656  // Ensure this is either relative to the current frame or the current
657  // wave.
658  MachineOperand &SOff =
659  *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
660  if (!SOff.isImm() || SOff.getImm() != 0)
661  return;
662  }
663 
664  // A frame index will resolve to a positive constant, so it should always be
665  // safe to fold the addressing mode, even pre-GFX9.
666  UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
667 
668  if (TII->isFLATScratch(*UseMI) &&
670  AMDGPU::OpName::vaddr) != -1) {
671  unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
672  UseMI->setDesc(TII->get(NewOpc));
673  }
674 
675  return;
676  }
677 
678  bool FoldingImmLike =
679  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
680 
681  if (FoldingImmLike && UseMI->isCopy()) {
682  Register DestReg = UseMI->getOperand(0).getReg();
683  Register SrcReg = UseMI->getOperand(1).getReg();
684  assert(SrcReg.isVirtual());
685 
686  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
687 
688  // Don't fold into a copy to a physical register with the same class. Doing
689  // so would interfere with the register coalescer's logic which would avoid
690  // redundant initalizations.
691  if (DestReg.isPhysical() && SrcRC->contains(DestReg))
692  return;
693 
694  const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
695  if (!DestReg.isPhysical()) {
696  if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
698  for (auto &Use : MRI->use_nodbg_operands(DestReg)) {
699  // There's no point trying to fold into an implicit operand.
700  if (Use.isImplicit())
701  continue;
702 
703  CopyUses.emplace_back(Use.getParent(),
704  Use.getParent()->getOperandNo(&Use),
705  &UseMI->getOperand(1));
706  }
707  for (auto &F : CopyUses) {
708  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
709  }
710  }
711 
712  if (DestRC == &AMDGPU::AGPR_32RegClass &&
713  TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
714  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
715  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
716  CopiesToReplace.push_back(UseMI);
717  return;
718  }
719  }
720 
721  // In order to fold immediates into copies, we need to change the
722  // copy to a MOV.
723 
724  unsigned MovOp = TII->getMovOpcode(DestRC);
725  if (MovOp == AMDGPU::COPY)
726  return;
727 
728  UseMI->setDesc(TII->get(MovOp));
731  while (ImpOpI != ImpOpE) {
732  MachineInstr::mop_iterator Tmp = ImpOpI;
733  ImpOpI++;
735  }
736  CopiesToReplace.push_back(UseMI);
737  } else {
738  if (UseMI->isCopy() && OpToFold.isReg() &&
739  UseMI->getOperand(0).getReg().isVirtual() &&
740  !UseMI->getOperand(1).getSubReg()) {
741  LLVM_DEBUG(dbgs() << "Folding " << OpToFold << "\n into " << *UseMI);
742  unsigned Size = TII->getOpSize(*UseMI, 1);
743  Register UseReg = OpToFold.getReg();
745  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
746  UseMI->getOperand(1).setIsKill(false);
747  CopiesToReplace.push_back(UseMI);
748  OpToFold.setIsKill(false);
749 
750  // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32
751  // can only accept VGPR or inline immediate. Recreate a reg_sequence with
752  // its initializers right here, so we will rematerialize immediates and
753  // avoid copies via different reg classes.
755  if (Size > 4 && TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
757  *MRI)) {
758  const DebugLoc &DL = UseMI->getDebugLoc();
760 
761  UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
762  for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I)
764 
768  for (unsigned I = 0; I < Size / 4; ++I) {
769  MachineOperand *Def = Defs[I].first;
771  if (Def->isImm() &&
772  TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
773  int64_t Imm = Def->getImm();
774 
775  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
776  BuildMI(MBB, UseMI, DL,
777  TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addImm(Imm);
778  B.addReg(Tmp);
779  } else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
780  auto Src = getRegSubRegPair(*Def);
781  Def->setIsKill(false);
782  if (!SeenAGPRs.insert(Src)) {
783  // We cannot build a reg_sequence out of the same registers, they
784  // must be copied. Better do it here before copyPhysReg() created
785  // several reads to do the AGPR->VGPR->AGPR copy.
786  CopyToVGPR = Src;
787  } else {
788  B.addReg(Src.Reg, Def->isUndef() ? RegState::Undef : 0,
789  Src.SubReg);
790  }
791  } else {
792  assert(Def->isReg());
793  Def->setIsKill(false);
794  auto Src = getRegSubRegPair(*Def);
795 
796  // Direct copy from SGPR to AGPR is not possible. To avoid creation
797  // of exploded copies SGPR->VGPR->AGPR in the copyPhysReg() later,
798  // create a copy here and track if we already have such a copy.
799  if (TRI->isSGPRReg(*MRI, Src.Reg)) {
800  CopyToVGPR = Src;
801  } else {
802  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
803  BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def);
804  B.addReg(Tmp);
805  }
806  }
807 
808  if (CopyToVGPR.Reg) {
809  Register Vgpr;
810  if (VGPRCopies.count(CopyToVGPR)) {
811  Vgpr = VGPRCopies[CopyToVGPR];
812  } else {
813  Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
814  BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
815  VGPRCopies[CopyToVGPR] = Vgpr;
816  }
817  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
818  BuildMI(MBB, UseMI, DL,
819  TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addReg(Vgpr);
820  B.addReg(Tmp);
821  }
822 
823  B.addImm(Defs[I].second);
824  }
825  LLVM_DEBUG(dbgs() << "Folded " << *UseMI);
826  return;
827  }
828 
829  if (Size != 4)
830  return;
831  if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
832  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
833  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
834  else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
835  TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
836  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
837  else if (ST->hasGFX90AInsts() &&
838  TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
839  TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
840  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
841  return;
842  }
843 
844  unsigned UseOpc = UseMI->getOpcode();
845  if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
846  (UseOpc == AMDGPU::V_READLANE_B32 &&
847  (int)UseOpIdx ==
848  AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
849  // %vgpr = V_MOV_B32 imm
850  // %sgpr = V_READFIRSTLANE_B32 %vgpr
851  // =>
852  // %sgpr = S_MOV_B32 imm
853  if (FoldingImmLike) {
855  UseMI->getOperand(UseOpIdx).getReg(),
856  *OpToFold.getParent(),
857  *UseMI))
858  return;
859 
860  UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
861 
862  if (OpToFold.isImm())
863  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
864  else
866  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
867  return;
868  }
869 
870  if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
872  UseMI->getOperand(UseOpIdx).getReg(),
873  *OpToFold.getParent(),
874  *UseMI))
875  return;
876 
877  // %vgpr = COPY %sgpr0
878  // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
879  // =>
880  // %sgpr1 = COPY %sgpr0
881  UseMI->setDesc(TII->get(AMDGPU::COPY));
882  UseMI->getOperand(1).setReg(OpToFold.getReg());
883  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
884  UseMI->getOperand(1).setIsKill(false);
885  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
886  return;
887  }
888  }
889 
890  const MCInstrDesc &UseDesc = UseMI->getDesc();
891 
892  // Don't fold into target independent nodes. Target independent opcodes
893  // don't have defined register classes.
894  if (UseDesc.isVariadic() ||
895  UseOp.isImplicit() ||
896  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
897  return;
898  }
899 
900  if (!FoldingImmLike) {
901  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
902 
903  // FIXME: We could try to change the instruction from 64-bit to 32-bit
904  // to enable more folding opportunites. The shrink operands pass
905  // already does this.
906  return;
907  }
908 
909 
910  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
911  const TargetRegisterClass *FoldRC =
912  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
913 
914  // Split 64-bit constants into 32-bits for folding.
915  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
916  Register UseReg = UseOp.getReg();
917  const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
918 
919  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
920  return;
921 
922  APInt Imm(64, OpToFold.getImm());
923  if (UseOp.getSubReg() == AMDGPU::sub0) {
924  Imm = Imm.getLoBits(32);
925  } else {
926  assert(UseOp.getSubReg() == AMDGPU::sub1);
927  Imm = Imm.getHiBits(32);
928  }
929 
930  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
931  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
932  return;
933  }
934 
935 
936 
937  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
938 }
939 
940 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
941  uint32_t LHS, uint32_t RHS) {
942  switch (Opcode) {
943  case AMDGPU::V_AND_B32_e64:
944  case AMDGPU::V_AND_B32_e32:
945  case AMDGPU::S_AND_B32:
946  Result = LHS & RHS;
947  return true;
948  case AMDGPU::V_OR_B32_e64:
949  case AMDGPU::V_OR_B32_e32:
950  case AMDGPU::S_OR_B32:
951  Result = LHS | RHS;
952  return true;
953  case AMDGPU::V_XOR_B32_e64:
954  case AMDGPU::V_XOR_B32_e32:
955  case AMDGPU::S_XOR_B32:
956  Result = LHS ^ RHS;
957  return true;
958  case AMDGPU::S_XNOR_B32:
959  Result = ~(LHS ^ RHS);
960  return true;
961  case AMDGPU::S_NAND_B32:
962  Result = ~(LHS & RHS);
963  return true;
964  case AMDGPU::S_NOR_B32:
965  Result = ~(LHS | RHS);
966  return true;
967  case AMDGPU::S_ANDN2_B32:
968  Result = LHS & ~RHS;
969  return true;
970  case AMDGPU::S_ORN2_B32:
971  Result = LHS | ~RHS;
972  return true;
973  case AMDGPU::V_LSHL_B32_e64:
974  case AMDGPU::V_LSHL_B32_e32:
975  case AMDGPU::S_LSHL_B32:
976  // The instruction ignores the high bits for out of bounds shifts.
977  Result = LHS << (RHS & 31);
978  return true;
979  case AMDGPU::V_LSHLREV_B32_e64:
980  case AMDGPU::V_LSHLREV_B32_e32:
981  Result = RHS << (LHS & 31);
982  return true;
983  case AMDGPU::V_LSHR_B32_e64:
984  case AMDGPU::V_LSHR_B32_e32:
985  case AMDGPU::S_LSHR_B32:
986  Result = LHS >> (RHS & 31);
987  return true;
988  case AMDGPU::V_LSHRREV_B32_e64:
989  case AMDGPU::V_LSHRREV_B32_e32:
990  Result = RHS >> (LHS & 31);
991  return true;
992  case AMDGPU::V_ASHR_I32_e64:
993  case AMDGPU::V_ASHR_I32_e32:
994  case AMDGPU::S_ASHR_I32:
995  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
996  return true;
997  case AMDGPU::V_ASHRREV_I32_e64:
998  case AMDGPU::V_ASHRREV_I32_e32:
999  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
1000  return true;
1001  default:
1002  return false;
1003  }
1004 }
1005 
1006 static unsigned getMovOpc(bool IsScalar) {
1007  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1008 }
1009 
1010 /// Remove any leftover implicit operands from mutating the instruction. e.g.
1011 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
1012 /// anymore.
1014  const MCInstrDesc &Desc = MI.getDesc();
1015  unsigned NumOps = Desc.getNumOperands() +
1016  Desc.getNumImplicitUses() +
1017  Desc.getNumImplicitDefs();
1018 
1019  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
1020  MI.RemoveOperand(I);
1021 }
1022 
1023 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1024  MI.setDesc(NewDesc);
1026 }
1027 
1029  MachineOperand &Op) {
1030  if (Op.isReg()) {
1031  // If this has a subregister, it obviously is a register source.
1032  if (Op.getSubReg() != AMDGPU::NoSubRegister || !Op.getReg().isVirtual())
1033  return &Op;
1034 
1035  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
1036  if (Def && Def->isMoveImmediate()) {
1037  MachineOperand &ImmSrc = Def->getOperand(1);
1038  if (ImmSrc.isImm())
1039  return &ImmSrc;
1040  }
1041  }
1042 
1043  return &Op;
1044 }
1045 
1046 // Try to simplify operations with a constant that may appear after instruction
1047 // selection.
1048 // TODO: See if a frame index with a fixed offset can fold.
1050  const SIInstrInfo *TII,
1051  MachineInstr *MI,
1052  MachineOperand *ImmOp) {
1053  unsigned Opc = MI->getOpcode();
1054  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1055  Opc == AMDGPU::S_NOT_B32) {
1056  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
1057  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
1058  return true;
1059  }
1060 
1061  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1062  if (Src1Idx == -1)
1063  return false;
1064 
1065  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1066  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
1067  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
1068 
1069  if (!Src0->isImm() && !Src1->isImm())
1070  return false;
1071 
1072  // and k0, k1 -> v_mov_b32 (k0 & k1)
1073  // or k0, k1 -> v_mov_b32 (k0 | k1)
1074  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1075  if (Src0->isImm() && Src1->isImm()) {
1076  int32_t NewImm;
1077  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
1078  return false;
1079 
1080  const SIRegisterInfo &TRI = TII->getRegisterInfo();
1081  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
1082 
1083  // Be careful to change the right operand, src0 may belong to a different
1084  // instruction.
1085  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1086  MI->RemoveOperand(Src1Idx);
1087  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
1088  return true;
1089  }
1090 
1091  if (!MI->isCommutable())
1092  return false;
1093 
1094  if (Src0->isImm() && !Src1->isImm()) {
1095  std::swap(Src0, Src1);
1096  std::swap(Src0Idx, Src1Idx);
1097  }
1098 
1099  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
1100  if (Opc == AMDGPU::V_OR_B32_e64 ||
1101  Opc == AMDGPU::V_OR_B32_e32 ||
1102  Opc == AMDGPU::S_OR_B32) {
1103  if (Src1Val == 0) {
1104  // y = or x, 0 => y = copy x
1105  MI->RemoveOperand(Src1Idx);
1106  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1107  } else if (Src1Val == -1) {
1108  // y = or x, -1 => y = v_mov_b32 -1
1109  MI->RemoveOperand(Src1Idx);
1110  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
1111  } else
1112  return false;
1113 
1114  return true;
1115  }
1116 
1117  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
1118  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
1119  MI->getOpcode() == AMDGPU::S_AND_B32) {
1120  if (Src1Val == 0) {
1121  // y = and x, 0 => y = v_mov_b32 0
1122  MI->RemoveOperand(Src0Idx);
1123  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
1124  } else if (Src1Val == -1) {
1125  // y = and x, -1 => y = copy x
1126  MI->RemoveOperand(Src1Idx);
1127  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1129  } else
1130  return false;
1131 
1132  return true;
1133  }
1134 
1135  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
1136  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
1137  MI->getOpcode() == AMDGPU::S_XOR_B32) {
1138  if (Src1Val == 0) {
1139  // y = xor x, 0 => y = copy x
1140  MI->RemoveOperand(Src1Idx);
1141  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1142  return true;
1143  }
1144  }
1145 
1146  return false;
1147 }
1148 
1149 // Try to fold an instruction into a simpler one
1150 bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
1151  unsigned Opc = MI.getOpcode();
1152  if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1153  Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1154  return false;
1155 
1156  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1157  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1158  if (!Src1->isIdenticalTo(*Src0)) {
1159  auto *Src0Imm = getImmOrMaterializedImm(*MRI, *Src0);
1160  auto *Src1Imm = getImmOrMaterializedImm(*MRI, *Src1);
1161  if (!Src1Imm->isIdenticalTo(*Src0Imm))
1162  return false;
1163  }
1164 
1165  int Src1ModIdx =
1166  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1167  int Src0ModIdx =
1168  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1169  if ((Src1ModIdx != -1 && MI.getOperand(Src1ModIdx).getImm() != 0) ||
1170  (Src0ModIdx != -1 && MI.getOperand(Src0ModIdx).getImm() != 0))
1171  return false;
1172 
1173  LLVM_DEBUG(dbgs() << "Folded " << MI << " into ");
1174  auto &NewDesc =
1175  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
1176  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1177  if (Src2Idx != -1)
1178  MI.RemoveOperand(Src2Idx);
1179  MI.RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
1180  if (Src1ModIdx != -1)
1181  MI.RemoveOperand(Src1ModIdx);
1182  if (Src0ModIdx != -1)
1183  MI.RemoveOperand(Src0ModIdx);
1184  mutateCopyOp(MI, NewDesc);
1185  LLVM_DEBUG(dbgs() << MI);
1186  return true;
1187 }
1188 
1189 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
1190  MachineOperand &OpToFold) const {
1191  // We need mutate the operands of new mov instructions to add implicit
1192  // uses of EXEC, but adding them invalidates the use_iterator, so defer
1193  // this.
1194  SmallVector<MachineInstr *, 4> CopiesToReplace;
1196  MachineOperand &Dst = MI.getOperand(0);
1197 
1198  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1199  if (FoldingImm) {
1200  unsigned NumLiteralUses = 0;
1201  MachineOperand *NonInlineUse = nullptr;
1202  int NonInlineUseOpNo = -1;
1203 
1204  bool Again;
1205  do {
1206  Again = false;
1207  for (auto &Use : make_early_inc_range(MRI->use_nodbg_operands(Dst.getReg()))) {
1208  MachineInstr *UseMI = Use.getParent();
1209  unsigned OpNo = UseMI->getOperandNo(&Use);
1210 
1211  // Folding the immediate may reveal operations that can be constant
1212  // folded or replaced with a copy. This can happen for example after
1213  // frame indices are lowered to constants or from splitting 64-bit
1214  // constants.
1215  //
1216  // We may also encounter cases where one or both operands are
1217  // immediates materialized into a register, which would ordinarily not
1218  // be folded due to multiple uses or operand constraints.
1219 
1220  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
1221  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI);
1222 
1223  // Some constant folding cases change the same immediate's use to a new
1224  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
1225  // again. The same constant folded instruction could also have a second
1226  // use operand.
1227  FoldList.clear();
1228  Again = true;
1229  break;
1230  }
1231 
1232  // Try to fold any inline immediate uses, and then only fold other
1233  // constants if they have one use.
1234  //
1235  // The legality of the inline immediate must be checked based on the use
1236  // operand, not the defining instruction, because 32-bit instructions
1237  // with 32-bit inline immediate sources may be used to materialize
1238  // constants used in 16-bit operands.
1239  //
1240  // e.g. it is unsafe to fold:
1241  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
1242  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
1243 
1244  // Folding immediates with more than one use will increase program size.
1245  // FIXME: This will also reduce register usage, which may be better
1246  // in some cases. A better heuristic is needed.
1247  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
1248  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
1249  } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
1250  foldOperand(OpToFold, UseMI, OpNo, FoldList,
1251  CopiesToReplace);
1252  } else {
1253  if (++NumLiteralUses == 1) {
1254  NonInlineUse = &Use;
1255  NonInlineUseOpNo = OpNo;
1256  }
1257  }
1258  }
1259  } while (Again);
1260 
1261  if (NumLiteralUses == 1) {
1262  MachineInstr *UseMI = NonInlineUse->getParent();
1263  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
1264  }
1265  } else {
1266  // Folding register.
1267  SmallVector <MachineOperand *, 4> UsesToProcess;
1268  for (auto &Use : MRI->use_nodbg_operands(Dst.getReg()))
1269  UsesToProcess.push_back(&Use);
1270  for (auto U : UsesToProcess) {
1271  MachineInstr *UseMI = U->getParent();
1272 
1273  foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U),
1274  FoldList, CopiesToReplace);
1275  }
1276  }
1277 
1278  MachineFunction *MF = MI.getParent()->getParent();
1279  // Make sure we add EXEC uses to any new v_mov instructions created.
1280  for (MachineInstr *Copy : CopiesToReplace)
1281  Copy->addImplicitDefUseOperands(*MF);
1282 
1283  for (FoldCandidate &Fold : FoldList) {
1284  assert(!Fold.isReg() || Fold.OpToFold);
1285  if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1286  Register Reg = Fold.OpToFold->getReg();
1287  MachineInstr *DefMI = Fold.OpToFold->getParent();
1288  if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
1289  execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
1290  continue;
1291  }
1292  if (updateOperand(Fold, *TII, *TRI, *ST)) {
1293  // Clear kill flags.
1294  if (Fold.isReg()) {
1295  assert(Fold.OpToFold && Fold.OpToFold->isReg());
1296  // FIXME: Probably shouldn't bother trying to fold if not an
1297  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
1298  // copies.
1299  MRI->clearKillFlags(Fold.OpToFold->getReg());
1300  }
1301  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
1302  << static_cast<int>(Fold.UseOpNo) << " of "
1303  << *Fold.UseMI);
1304  } else if (Fold.isCommuted()) {
1305  // Restoring instruction's original operand order if fold has failed.
1306  TII->commuteInstruction(*Fold.UseMI, false);
1307  }
1308  }
1309 }
1310 
1311 // Clamp patterns are canonically selected to v_max_* instructions, so only
1312 // handle them.
1313 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1314  unsigned Op = MI.getOpcode();
1315  switch (Op) {
1316  case AMDGPU::V_MAX_F32_e64:
1317  case AMDGPU::V_MAX_F16_e64:
1318  case AMDGPU::V_MAX_F64_e64:
1319  case AMDGPU::V_PK_MAX_F16: {
1320  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
1321  return nullptr;
1322 
1323  // Make sure sources are identical.
1324  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1325  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1326  if (!Src0->isReg() || !Src1->isReg() ||
1327  Src0->getReg() != Src1->getReg() ||
1328  Src0->getSubReg() != Src1->getSubReg() ||
1329  Src0->getSubReg() != AMDGPU::NoSubRegister)
1330  return nullptr;
1331 
1332  // Can't fold up if we have modifiers.
1333  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1334  return nullptr;
1335 
1336  unsigned Src0Mods
1337  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
1338  unsigned Src1Mods
1339  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
1340 
1341  // Having a 0 op_sel_hi would require swizzling the output in the source
1342  // instruction, which we can't do.
1343  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
1344  : 0u;
1345  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1346  return nullptr;
1347  return Src0;
1348  }
1349  default:
1350  return nullptr;
1351  }
1352 }
1353 
1354 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1355 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
1356  const MachineOperand *ClampSrc = isClamp(MI);
1357  if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
1358  return false;
1359 
1360  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1361 
1362  // The type of clamp must be compatible.
1363  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
1364  return false;
1365 
1366  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
1367  if (!DefClamp)
1368  return false;
1369 
1370  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def);
1371 
1372  // Clamp is applied after omod, so it is OK if omod is set.
1373  DefClamp->setImm(1);
1374  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1375  MI.eraseFromParent();
1376  return true;
1377 }
1378 
1379 static int getOModValue(unsigned Opc, int64_t Val) {
1380  switch (Opc) {
1381  case AMDGPU::V_MUL_F64_e64: {
1382  switch (Val) {
1383  case 0x3fe0000000000000: // 0.5
1384  return SIOutMods::DIV2;
1385  case 0x4000000000000000: // 2.0
1386  return SIOutMods::MUL2;
1387  case 0x4010000000000000: // 4.0
1388  return SIOutMods::MUL4;
1389  default:
1390  return SIOutMods::NONE;
1391  }
1392  }
1393  case AMDGPU::V_MUL_F32_e64: {
1394  switch (static_cast<uint32_t>(Val)) {
1395  case 0x3f000000: // 0.5
1396  return SIOutMods::DIV2;
1397  case 0x40000000: // 2.0
1398  return SIOutMods::MUL2;
1399  case 0x40800000: // 4.0
1400  return SIOutMods::MUL4;
1401  default:
1402  return SIOutMods::NONE;
1403  }
1404  }
1405  case AMDGPU::V_MUL_F16_e64: {
1406  switch (static_cast<uint16_t>(Val)) {
1407  case 0x3800: // 0.5
1408  return SIOutMods::DIV2;
1409  case 0x4000: // 2.0
1410  return SIOutMods::MUL2;
1411  case 0x4400: // 4.0
1412  return SIOutMods::MUL4;
1413  default:
1414  return SIOutMods::NONE;
1415  }
1416  }
1417  default:
1418  llvm_unreachable("invalid mul opcode");
1419  }
1420 }
1421 
1422 // FIXME: Does this really not support denormals with f16?
1423 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1424 // handled, so will anything other than that break?
1425 std::pair<const MachineOperand *, int>
1426 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1427  unsigned Op = MI.getOpcode();
1428  switch (Op) {
1429  case AMDGPU::V_MUL_F64_e64:
1430  case AMDGPU::V_MUL_F32_e64:
1431  case AMDGPU::V_MUL_F16_e64: {
1432  // If output denormals are enabled, omod is ignored.
1433  if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
1434  ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64) &&
1435  MFI->getMode().FP64FP16OutputDenormals))
1436  return std::make_pair(nullptr, SIOutMods::NONE);
1437 
1438  const MachineOperand *RegOp = nullptr;
1439  const MachineOperand *ImmOp = nullptr;
1440  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1441  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1442  if (Src0->isImm()) {
1443  ImmOp = Src0;
1444  RegOp = Src1;
1445  } else if (Src1->isImm()) {
1446  ImmOp = Src1;
1447  RegOp = Src0;
1448  } else
1449  return std::make_pair(nullptr, SIOutMods::NONE);
1450 
1451  int OMod = getOModValue(Op, ImmOp->getImm());
1452  if (OMod == SIOutMods::NONE ||
1453  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1454  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1455  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1456  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1457  return std::make_pair(nullptr, SIOutMods::NONE);
1458 
1459  return std::make_pair(RegOp, OMod);
1460  }
1461  case AMDGPU::V_ADD_F64_e64:
1462  case AMDGPU::V_ADD_F32_e64:
1463  case AMDGPU::V_ADD_F16_e64: {
1464  // If output denormals are enabled, omod is ignored.
1465  if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
1466  ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64) &&
1467  MFI->getMode().FP64FP16OutputDenormals))
1468  return std::make_pair(nullptr, SIOutMods::NONE);
1469 
1470  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1471  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1472  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1473 
1474  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1475  Src0->getSubReg() == Src1->getSubReg() &&
1476  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1477  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1478  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1479  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1480  return std::make_pair(Src0, SIOutMods::MUL2);
1481 
1482  return std::make_pair(nullptr, SIOutMods::NONE);
1483  }
1484  default:
1485  return std::make_pair(nullptr, SIOutMods::NONE);
1486  }
1487 }
1488 
1489 // FIXME: Does this need to check IEEE bit on function?
1490 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1491  const MachineOperand *RegOp;
1492  int OMod;
1493  std::tie(RegOp, OMod) = isOMod(MI);
1494  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1495  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1496  !MRI->hasOneNonDBGUser(RegOp->getReg()))
1497  return false;
1498 
1499  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1500  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1501  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1502  return false;
1503 
1504  // Clamp is applied after omod. If the source already has clamp set, don't
1505  // fold it.
1506  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1507  return false;
1508 
1509  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def);
1510 
1511  DefOMod->setImm(OMod);
1512  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1513  MI.eraseFromParent();
1514  return true;
1515 }
1516 
1517 // Try to fold a reg_sequence with vgpr output and agpr inputs into an
1518 // instruction which can take an agpr. So far that means a store.
1519 bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
1520  assert(MI.isRegSequence());
1521  auto Reg = MI.getOperand(0).getReg();
1522 
1523  if (!ST->hasGFX90AInsts() || !TRI->isVGPR(*MRI, Reg) ||
1524  !MRI->hasOneNonDBGUse(Reg))
1525  return false;
1526 
1529  return false;
1530 
1531  for (auto &Def : Defs) {
1532  const auto *Op = Def.first;
1533  if (!Op->isReg())
1534  return false;
1535  if (TRI->isAGPR(*MRI, Op->getReg()))
1536  continue;
1537  // Maybe this is a COPY from AREG
1538  const MachineInstr *SubDef = MRI->getVRegDef(Op->getReg());
1539  if (!SubDef || !SubDef->isCopy() || SubDef->getOperand(1).getSubReg())
1540  return false;
1541  if (!TRI->isAGPR(*MRI, SubDef->getOperand(1).getReg()))
1542  return false;
1543  }
1544 
1546  MachineInstr *UseMI = Op->getParent();
1547  while (UseMI->isCopy() && !Op->getSubReg()) {
1548  Reg = UseMI->getOperand(0).getReg();
1549  if (!TRI->isVGPR(*MRI, Reg) || !MRI->hasOneNonDBGUse(Reg))
1550  return false;
1551  Op = &*MRI->use_nodbg_begin(Reg);
1552  UseMI = Op->getParent();
1553  }
1554 
1555  if (Op->getSubReg())
1556  return false;
1557 
1558  unsigned OpIdx = Op - &UseMI->getOperand(0);
1559  const MCInstrDesc &InstDesc = UseMI->getDesc();
1560  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
1561  switch (OpInfo.RegClass) {
1562  case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
1563  case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
1564  case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
1565  case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
1566  case AMDGPU::AV_160RegClassID:
1567  break;
1568  default:
1569  return false;
1570  }
1571 
1572  const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
1573  auto Dst = MRI->createVirtualRegister(NewDstRC);
1574  auto RS = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
1575  TII->get(AMDGPU::REG_SEQUENCE), Dst);
1576 
1577  for (unsigned I = 0; I < Defs.size(); ++I) {
1578  MachineOperand *Def = Defs[I].first;
1579  Def->setIsKill(false);
1580  if (TRI->isAGPR(*MRI, Def->getReg())) {
1581  RS.add(*Def);
1582  } else { // This is a copy
1583  MachineInstr *SubDef = MRI->getVRegDef(Def->getReg());
1584  SubDef->getOperand(1).setIsKill(false);
1585  RS.addReg(SubDef->getOperand(1).getReg(), 0, Def->getSubReg());
1586  }
1587  RS.addImm(Defs[I].second);
1588  }
1589 
1590  Op->setReg(Dst);
1591  if (!TII->isOperandLegal(*UseMI, OpIdx, Op)) {
1592  Op->setReg(Reg);
1593  RS->eraseFromParent();
1594  return false;
1595  }
1596 
1597  LLVM_DEBUG(dbgs() << "Folded " << *RS << " into " << *UseMI);
1598 
1599  return true;
1600 }
1601 
1602 // Try to hoist an AGPR to VGPR copy out of the loop across a LCSSA PHI.
1603 // This should allow folding of an AGPR into a consumer which may support it.
1604 // I.e.:
1605 //
1606 // loop: // loop:
1607 // %1:vreg = COPY %0:areg // exit:
1608 // exit: => // %1:areg = PHI %0:areg, %loop
1609 // %2:vreg = PHI %1:vreg, %loop // %2:vreg = COPY %1:areg
1610 bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) {
1611  assert(PHI.isPHI());
1612 
1613  if (PHI.getNumExplicitOperands() != 3) // Single input LCSSA PHI
1614  return false;
1615 
1616  Register PhiIn = PHI.getOperand(1).getReg();
1617  Register PhiOut = PHI.getOperand(0).getReg();
1618  if (PHI.getOperand(1).getSubReg() ||
1619  !TRI->isVGPR(*MRI, PhiIn) || !TRI->isVGPR(*MRI, PhiOut))
1620  return false;
1621 
1622  // A single use should not matter for correctness, but if it has another use
1623  // inside the loop we may perform copy twice in a worst case.
1624  if (!MRI->hasOneNonDBGUse(PhiIn))
1625  return false;
1626 
1627  MachineInstr *Copy = MRI->getVRegDef(PhiIn);
1628  if (!Copy || !Copy->isCopy())
1629  return false;
1630 
1631  Register CopyIn = Copy->getOperand(1).getReg();
1632  if (!TRI->isAGPR(*MRI, CopyIn) || Copy->getOperand(1).getSubReg())
1633  return false;
1634 
1635  const TargetRegisterClass *ARC = MRI->getRegClass(CopyIn);
1636  Register NewReg = MRI->createVirtualRegister(ARC);
1637  PHI.getOperand(1).setReg(CopyIn);
1638  PHI.getOperand(0).setReg(NewReg);
1639 
1640  MachineBasicBlock *MBB = PHI.getParent();
1641  BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(),
1642  TII->get(AMDGPU::COPY), PhiOut)
1643  .addReg(NewReg, RegState::Kill);
1644  Copy->eraseFromParent(); // We know this copy had a single use.
1645 
1646  LLVM_DEBUG(dbgs() << "Folded " << PHI);
1647 
1648  return true;
1649 }
1650 
1651 // Attempt to convert VGPR load to an AGPR load.
1652 bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
1653  assert(MI.mayLoad());
1654  if (!ST->hasGFX90AInsts() || !MI.getNumOperands())
1655  return false;
1656 
1657  MachineOperand &Def = MI.getOperand(0);
1658  if (!Def.isDef())
1659  return false;
1660 
1661  Register DefReg = Def.getReg();
1662 
1663  if (DefReg.isPhysical() || !TRI->isVGPR(*MRI, DefReg))
1664  return false;
1665 
1667  SmallVector<Register, 8> MoveRegs;
1668  for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) {
1669  Users.push_back(&I);
1670  }
1671  if (Users.empty())
1672  return false;
1673 
1674  // Check that all uses a copy to an agpr or a reg_sequence producing an agpr.
1675  while (!Users.empty()) {
1676  const MachineInstr *I = Users.pop_back_val();
1677  if (!I->isCopy() && !I->isRegSequence())
1678  return false;
1679  Register DstReg = I->getOperand(0).getReg();
1680  if (TRI->isAGPR(*MRI, DstReg))
1681  continue;
1682  MoveRegs.push_back(DstReg);
1683  for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) {
1684  Users.push_back(&U);
1685  }
1686  }
1687 
1688  const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
1689  MRI->setRegClass(DefReg, TRI->getEquivalentAGPRClass(RC));
1690  if (!TII->isOperandLegal(MI, 0, &Def)) {
1691  MRI->setRegClass(DefReg, RC);
1692  return false;
1693  }
1694 
1695  while (!MoveRegs.empty()) {
1696  Register Reg = MoveRegs.pop_back_val();
1697  MRI->setRegClass(Reg, TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg)));
1698  }
1699 
1700  LLVM_DEBUG(dbgs() << "Folded " << MI);
1701 
1702  return true;
1703 }
1704 
1705 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1706  if (skipFunction(MF.getFunction()))
1707  return false;
1708 
1709  MRI = &MF.getRegInfo();
1710  ST = &MF.getSubtarget<GCNSubtarget>();
1711  TII = ST->getInstrInfo();
1712  TRI = &TII->getRegisterInfo();
1713  MFI = MF.getInfo<SIMachineFunctionInfo>();
1714 
1715  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1716  // correctly handle signed zeros.
1717  //
1718  // FIXME: Also need to check strictfp
1719  bool IsIEEEMode = MFI->getMode().IEEE;
1720  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1721 
1722  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1723  MachineOperand *CurrentKnownM0Val = nullptr;
1724  for (auto &MI : make_early_inc_range(*MBB)) {
1725  tryFoldCndMask(MI);
1726 
1727  if (MI.isRegSequence() && tryFoldRegSequence(MI))
1728  continue;
1729 
1730  if (MI.isPHI() && tryFoldLCSSAPhi(MI))
1731  continue;
1732 
1733  if (MI.mayLoad() && tryFoldLoad(MI))
1734  continue;
1735 
1736  if (!TII->isFoldableCopy(MI)) {
1737  // Saw an unknown clobber of m0, so we no longer know what it is.
1738  if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1739  CurrentKnownM0Val = nullptr;
1740 
1741  // TODO: Omod might be OK if there is NSZ only on the source
1742  // instruction, and not the omod multiply.
1743  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1744  !tryFoldOMod(MI))
1745  tryFoldClamp(MI);
1746 
1747  continue;
1748  }
1749 
1750  // Specially track simple redefs of m0 to the same value in a block, so we
1751  // can erase the later ones.
1752  if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1753  MachineOperand &NewM0Val = MI.getOperand(1);
1754  if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1755  MI.eraseFromParent();
1756  continue;
1757  }
1758 
1759  // We aren't tracking other physical registers
1760  CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
1761  nullptr : &NewM0Val;
1762  continue;
1763  }
1764 
1765  MachineOperand &OpToFold = MI.getOperand(1);
1766  bool FoldingImm =
1767  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1768 
1769  // FIXME: We could also be folding things like TargetIndexes.
1770  if (!FoldingImm && !OpToFold.isReg())
1771  continue;
1772 
1773  if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
1774  continue;
1775 
1776  // Prevent folding operands backwards in the function. For example,
1777  // the COPY opcode must not be replaced by 1 in this example:
1778  //
1779  // %3 = COPY %vgpr0; VGPR_32:%3
1780  // ...
1781  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1782  if (!MI.getOperand(0).getReg().isVirtual())
1783  continue;
1784 
1785  foldInstOperand(MI, OpToFold);
1786 
1787  // If we managed to fold all uses of this copy then we might as well
1788  // delete it now.
1789  if (MRI->use_nodbg_empty(MI.getOperand(0).getReg()))
1790  MI.eraseFromParentAndMarkDBGValuesForRemoval();
1791  }
1792  }
1793  return true;
1794 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
tryToFoldACImm
static bool tryToFoldACImm(const SIInstrInfo *TII, const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl< FoldCandidate > &FoldList)
Definition: SIFoldOperands.cpp:529
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIFoldOperands.cpp:18
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::MachineInstr::getOperandNo
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:672
macToMad
static unsigned macToMad(unsigned Opc)
Definition: SIFoldOperands.cpp:130
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:53
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:69
llvm
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:100
isUseSafeToFold
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
Definition: SIFoldOperands.cpp:475
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::MachineOperand::MachineOperandType
MachineOperandType
Definition: MachineOperand.h:51
SIMachineFunctionInfo.h
llvm::getRegSubRegPair
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1130
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstr::getNumExplicitOperands
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Definition: MachineInstr.cpp:726
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:225
evalBinaryInstruction
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
Definition: SIFoldOperands.cpp:940
llvm::MachineInstr::RemoveOperand
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
Definition: MachineInstr.cpp:303
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:890
appendFoldCandidate
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
Definition: SIFoldOperands.cpp:332
updateOperand
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI, const GCNSubtarget &ST)
Definition: SIFoldOperands.cpp:201
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:497
llvm::AMDGPU::OPERAND_REG_IMM_V2FP16
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:143
llvm::MachineInstr::implicit_operands
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:621
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:652
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:475
llvm::SIOutMods::DIV2
@ DIV2
Definition: SIDefines.h:214
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1279
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::MachineOperand::ChangeToFrameIndex
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
Definition: MachineOperand.cpp:214
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::SIOutMods::MUL2
@ MUL2
Definition: SIDefines.h:212
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:328
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
DepthFirstIterator.h
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:52
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineRegisterInfo::use_nodbg_begin
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:518
llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:464
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::MCOperandInfo
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:84
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::AMDGPU::OPERAND_REG_INLINE_AC_LAST
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:178
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:91
llvm::SIOutMods::NONE
@ NONE
Definition: SIDefines.h:211
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::SIOutMods::MUL4
@ MUL4
Definition: SIDefines.h:213
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:377
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:526
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:770
llvm::AMDGPU::OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:150
GCNSubtarget.h
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:62
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::MCInstrDesc::getNumImplicitUses
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:563
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:203
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:468
llvm::MachineOperand::MO_FrameIndex
@ MO_FrameIndex
Abstract Stack Frame Index.
Definition: MachineOperand.h:57
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
getOModValue
static int getOModValue(unsigned Opc, int64_t Val)
Definition: SIFoldOperands.cpp:1379
llvm::AMDGPU::getFlatScratchInstSSfromSV
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand::ChangeToImmediate
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition: MachineOperand.cpp:156
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::MCInstrDesc::getNumImplicitDefs
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:585
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:235
llvm::MachineInstr::FmNsz
@ FmNsz
Definition: MachineInstr.h:92
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:49
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPUMachineFunction::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: AMDGPUMachineFunction.h:74
getMovOpc
static unsigned getMovOpc(bool IsScalar)
Definition: SIFoldOperands.cpp:1006
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::createSIFoldOperandsPass
FunctionPass * createSIFoldOperandsPass()
Definition: SIFoldOperands.cpp:197
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:431
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Definition: AMDGPUBaseInfo.cpp:1553
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::MachineOperand::isUndef
bool isUndef() const
Definition: MachineOperand.h:392
AMDGPUMCTargetDesc.h
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:723
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:409
llvm::MachineOperand::substVirtReg
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
Definition: MachineOperand.cpp:77
llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:465
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
getImmOrMaterializedImm
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
Definition: SIFoldOperands.cpp:1028
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
UseReg
static Register UseReg(const MachineOperand &MO)
Definition: HexagonCopyToCombine.cpp:259
llvm::MCOperandInfo::OperandType
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:96
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::execMayBeModifiedBeforeUse
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
Definition: SIInstrInfo.cpp:7522
llvm::MCOI::OPERAND_REGISTER
@ OPERAND_REGISTER
Definition: MCInstrDesc.h:60
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:199
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:216
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:958
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:207
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1243
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
getRegSeqInit
static bool getRegSeqInit(SmallVectorImpl< std::pair< MachineOperand *, unsigned >> &Defs, Register UseReg, uint8_t OpTy, const SIInstrInfo *TII, const MachineRegisterInfo &MRI)
Definition: SIFoldOperands.cpp:496
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:155
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::MachineBasicBlock::LQR_Dead
@ LQR_Dead
Register is known to be fully dead.
Definition: MachineBasicBlock.h:928
llvm::MachineInstr::readsRegister
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:1355
isReg
static bool isReg(const MCInst &MI, unsigned OpNo)
Definition: MipsInstPrinter.cpp:31
llvm::SIInstrFlags::IsPacked
@ IsPacked
Definition: SIDefines.h:88
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:566
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:419
tryConstantFoldOp
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
Definition: SIFoldOperands.cpp:1049
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:508
uint32_t
llvm::ModRefInfo::Mod
@ Mod
The access may modify the value stored in memory.
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:372
mutateCopyOp
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
Definition: SIFoldOperands.cpp:1023
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
frameIndexMayFold
static bool frameIndexMayFold(const SIInstrInfo *TII, const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold)
Definition: SIFoldOperands.cpp:174
stripExtraCopyOperands
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
Definition: SIFoldOperands.cpp:1013
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:362
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1271
llvm::MachineOperand::ChangeToGA
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
Definition: MachineOperand.cpp:190
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::AMDGPU::OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:144
uint16_t
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::computeRegisterLiveness
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
Definition: MachineBasicBlock.cpp:1508
llvm::AMDGPU::isFoldableLiteralV216
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1736
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:53
llvm::MachineOperand::getIndex
int getIndex() const
Definition: MachineOperand.h:554
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:156
llvm::AMDGPU::OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:174
llvm::MCInstrDesc::isVariadic
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:257
isUseMIInFoldList
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
Definition: SIFoldOperands.cpp:323
llvm::SIInstrFlags::IsMAI
@ IsMAI
Definition: SIDefines.h:103
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MachineRegisterInfo::hasOneNonDBGUser
bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
Definition: MachineRegisterInfo.cpp:423
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:320
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
tryAddToFoldList
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
Definition: SIFoldOperands.cpp:345
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:101
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:481
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:331
Users
iv Induction Variable Users
Definition: IVUsers.cpp:52
isInlineConstantIfFolded
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Definition: SIFoldOperands.cpp:150
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::MachineInstr::setDesc
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition: MachineInstr.h:1731
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:177
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SIRegisterInfo::opCanUseInlineConstant
bool opCanUseInlineConstant(unsigned OpType) const
Definition: SIRegisterInfo.cpp:2169
llvm::TargetInstrInfo::CommuteAnyOperandIndex
static const unsigned CommuteAnyOperandIndex
Definition: TargetInstrInfo.h:418
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:204
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:229
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:58
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
llvm::AMDGPU::OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:175
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::MachineOperand::isGlobal
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Definition: MachineOperand.h:336
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38