LLVM  16.0.0git
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "GCNSubtarget.h"
14 #include "SIMachineFunctionInfo.h"
17 
18 #define DEBUG_TYPE "si-fold-operands"
19 using namespace llvm;
20 
21 namespace {
22 
23 struct FoldCandidate {
25  union {
26  MachineOperand *OpToFold;
27  uint64_t ImmToFold;
28  int FrameIndexToFold;
29  };
30  int ShrinkOpcode;
31  unsigned UseOpNo;
33  bool Commuted;
34 
35  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
36  bool Commuted_ = false,
37  int ShrinkOp = -1) :
38  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
39  Kind(FoldOp->getType()),
40  Commuted(Commuted_) {
41  if (FoldOp->isImm()) {
42  ImmToFold = FoldOp->getImm();
43  } else if (FoldOp->isFI()) {
44  FrameIndexToFold = FoldOp->getIndex();
45  } else {
46  assert(FoldOp->isReg() || FoldOp->isGlobal());
47  OpToFold = FoldOp;
48  }
49  }
50 
51  bool isFI() const {
52  return Kind == MachineOperand::MO_FrameIndex;
53  }
54 
55  bool isImm() const {
56  return Kind == MachineOperand::MO_Immediate;
57  }
58 
59  bool isReg() const {
60  return Kind == MachineOperand::MO_Register;
61  }
62 
63  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
64 
65  bool needsShrink() const { return ShrinkOpcode != -1; }
66 };
67 
68 class SIFoldOperands : public MachineFunctionPass {
69 public:
70  static char ID;
72  const SIInstrInfo *TII;
73  const SIRegisterInfo *TRI;
74  const GCNSubtarget *ST;
75  const SIMachineFunctionInfo *MFI;
76 
77  bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
78  const MachineOperand &OpToFold) const;
79 
80  bool updateOperand(FoldCandidate &Fold) const;
81 
82  bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
83  MachineInstr *MI, unsigned OpNo,
84  MachineOperand *OpToFold) const;
85  bool isUseSafeToFold(const MachineInstr &MI,
86  const MachineOperand &UseMO) const;
87  bool
88  getRegSeqInit(SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
89  Register UseReg, uint8_t OpTy) const;
90  bool tryToFoldACImm(const MachineOperand &OpToFold, MachineInstr *UseMI,
91  unsigned UseOpIdx,
92  SmallVectorImpl<FoldCandidate> &FoldList) const;
93  void foldOperand(MachineOperand &OpToFold,
95  int UseOpIdx,
97  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
98 
99  MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const;
100  bool tryConstantFoldOp(MachineInstr *MI) const;
101  bool tryFoldCndMask(MachineInstr &MI) const;
102  bool tryFoldZeroHighBits(MachineInstr &MI) const;
103  bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
104  bool tryFoldFoldableCopy(MachineInstr &MI,
105  MachineOperand *&CurrentKnownM0Val) const;
106 
107  const MachineOperand *isClamp(const MachineInstr &MI) const;
108  bool tryFoldClamp(MachineInstr &MI);
109 
110  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
111  bool tryFoldOMod(MachineInstr &MI);
112  bool tryFoldRegSequence(MachineInstr &MI);
113  bool tryFoldLCSSAPhi(MachineInstr &MI);
114  bool tryFoldLoad(MachineInstr &MI);
115 
116 public:
117  SIFoldOperands() : MachineFunctionPass(ID) {
119  }
120 
121  bool runOnMachineFunction(MachineFunction &MF) override;
122 
123  StringRef getPassName() const override { return "SI Fold Operands"; }
124 
125  void getAnalysisUsage(AnalysisUsage &AU) const override {
126  AU.setPreservesCFG();
128  }
129 };
130 
131 } // End anonymous namespace.
132 
133 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
134  "SI Fold Operands", false, false)
135 
136 char SIFoldOperands::ID = 0;
137 
138 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
139 
140 // Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
141 static unsigned macToMad(unsigned Opc) {
142  switch (Opc) {
143  case AMDGPU::V_MAC_F32_e64:
144  return AMDGPU::V_MAD_F32_e64;
145  case AMDGPU::V_MAC_F16_e64:
146  return AMDGPU::V_MAD_F16_e64;
147  case AMDGPU::V_FMAC_F32_e64:
148  return AMDGPU::V_FMA_F32_e64;
149  case AMDGPU::V_FMAC_F16_e64:
150  return AMDGPU::V_FMA_F16_gfx9_e64;
151  case AMDGPU::V_FMAC_F16_t16_e64:
152  return AMDGPU::V_FMA_F16_gfx9_e64;
153  case AMDGPU::V_FMAC_LEGACY_F32_e64:
154  return AMDGPU::V_FMA_LEGACY_F32_e64;
155  case AMDGPU::V_FMAC_F64_e64:
156  return AMDGPU::V_FMA_F64_e64;
157  }
158  return AMDGPU::INSTRUCTION_LIST_END;
159 }
160 
161 // TODO: Add heuristic that the frame index might not fit in the addressing mode
162 // immediate offset to avoid materializing in loops.
163 bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
164  const MachineOperand &OpToFold) const {
165  if (!OpToFold.isFI())
166  return false;
167 
168  const unsigned Opc = UseMI.getOpcode();
169  if (TII->isMUBUF(UseMI))
170  return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
171  if (!TII->isFLATScratch(UseMI))
172  return false;
173 
174  int SIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
175  if (OpNo == SIdx)
176  return true;
177 
178  int VIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
179  return OpNo == VIdx && SIdx == -1;
180 }
181 
183  return new SIFoldOperands();
184 }
185 
186 bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
187  MachineInstr *MI = Fold.UseMI;
188  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
189  assert(Old.isReg());
190 
191 
192  const uint64_t TSFlags = MI->getDesc().TSFlags;
193  if (Fold.isImm()) {
195  (!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) &&
196  AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
197  ST->hasInv2PiInlineImm())) {
198  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
199  // already set.
200  unsigned Opcode = MI->getOpcode();
201  int OpNo = MI->getOperandNo(&Old);
202  int ModIdx = -1;
203  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
204  ModIdx = AMDGPU::OpName::src0_modifiers;
205  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
206  ModIdx = AMDGPU::OpName::src1_modifiers;
207  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
208  ModIdx = AMDGPU::OpName::src2_modifiers;
209  assert(ModIdx != -1);
210  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
211  MachineOperand &Mod = MI->getOperand(ModIdx);
212  unsigned Val = Mod.getImm();
213  if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
214  // Only apply the following transformation if that operand requires
215  // a packed immediate.
216  switch (TII->get(Opcode).OpInfo[OpNo].OperandType) {
221  // If upper part is all zero we do not need op_sel_hi.
222  if (!isUInt<16>(Fold.ImmToFold)) {
223  if (!(Fold.ImmToFold & 0xffff)) {
224  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
225  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
226  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
227  return true;
228  }
229  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
230  Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
231  return true;
232  }
233  break;
234  default:
235  break;
236  }
237  }
238  }
239  }
240 
241  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
242  MachineBasicBlock *MBB = MI->getParent();
243  auto Liveness = MBB->computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 16);
244  if (Liveness != MachineBasicBlock::LQR_Dead) {
245  LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
246  return false;
247  }
248 
249  int Op32 = Fold.ShrinkOpcode;
250  MachineOperand &Dst0 = MI->getOperand(0);
251  MachineOperand &Dst1 = MI->getOperand(1);
252  assert(Dst0.isDef() && Dst1.isDef());
253 
254  bool HaveNonDbgCarryUse = !MRI->use_nodbg_empty(Dst1.getReg());
255 
256  const TargetRegisterClass *Dst0RC = MRI->getRegClass(Dst0.getReg());
257  Register NewReg0 = MRI->createVirtualRegister(Dst0RC);
258 
259  MachineInstr *Inst32 = TII->buildShrunkInst(*MI, Op32);
260 
261  if (HaveNonDbgCarryUse) {
262  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::COPY),
263  Dst1.getReg())
264  .addReg(AMDGPU::VCC, RegState::Kill);
265  }
266 
267  // Keep the old instruction around to avoid breaking iterators, but
268  // replace it with a dummy instruction to remove uses.
269  //
270  // FIXME: We should not invert how this pass looks at operands to avoid
271  // this. Should track set of foldable movs instead of looking for uses
272  // when looking at a use.
273  Dst0.setReg(NewReg0);
274  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
275  MI->removeOperand(I);
276  MI->setDesc(TII->get(AMDGPU::IMPLICIT_DEF));
277 
278  if (Fold.Commuted)
279  TII->commuteInstruction(*Inst32, false);
280  return true;
281  }
282 
283  assert(!Fold.needsShrink() && "not handled");
284 
285  if (Fold.isImm()) {
286  if (Old.isTied()) {
287  int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(MI->getOpcode());
288  if (NewMFMAOpc == -1)
289  return false;
290  MI->setDesc(TII->get(NewMFMAOpc));
291  MI->untieRegOperand(0);
292  }
293  Old.ChangeToImmediate(Fold.ImmToFold);
294  return true;
295  }
296 
297  if (Fold.isGlobal()) {
298  Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
299  Fold.OpToFold->getTargetFlags());
300  return true;
301  }
302 
303  if (Fold.isFI()) {
304  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
305  return true;
306  }
307 
308  MachineOperand *New = Fold.OpToFold;
309  Old.substVirtReg(New->getReg(), New->getSubReg(), *TRI);
310  Old.setIsUndef(New->isUndef());
311  return true;
312 }
313 
315  const MachineInstr *MI) {
316  return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; });
317 }
318 
320  MachineInstr *MI, unsigned OpNo,
321  MachineOperand *FoldOp, bool Commuted = false,
322  int ShrinkOp = -1) {
323  // Skip additional folding on the same operand.
324  for (FoldCandidate &Fold : FoldList)
325  if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
326  return;
327  LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
328  << " operand " << OpNo << "\n " << *MI);
329  FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
330 }
331 
332 bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
333  MachineInstr *MI, unsigned OpNo,
334  MachineOperand *OpToFold) const {
335  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
336  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
337  unsigned Opc = MI->getOpcode();
338  unsigned NewOpc = macToMad(Opc);
339  if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
340  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
341  // to fold the operand.
342  MI->setDesc(TII->get(NewOpc));
343  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold);
344  if (FoldAsMAD) {
345  MI->untieRegOperand(OpNo);
346  return true;
347  }
348  MI->setDesc(TII->get(Opc));
349  }
350 
351  // Special case for s_setreg_b32
352  if (OpToFold->isImm()) {
353  unsigned ImmOpc = 0;
354  if (Opc == AMDGPU::S_SETREG_B32)
355  ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
356  else if (Opc == AMDGPU::S_SETREG_B32_mode)
357  ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
358  if (ImmOpc) {
359  MI->setDesc(TII->get(ImmOpc));
360  appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
361  return true;
362  }
363  }
364 
365  // If we are already folding into another operand of MI, then
366  // we can't commute the instruction, otherwise we risk making the
367  // other fold illegal.
368  if (isUseMIInFoldList(FoldList, MI))
369  return false;
370 
371  unsigned CommuteOpNo = OpNo;
372 
373  // Operand is not legal, so try to commute the instruction to
374  // see if this makes it possible to fold.
375  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
376  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
377  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
378 
379  if (CanCommute) {
380  if (CommuteIdx0 == OpNo)
381  CommuteOpNo = CommuteIdx1;
382  else if (CommuteIdx1 == OpNo)
383  CommuteOpNo = CommuteIdx0;
384  }
385 
386 
387  // One of operands might be an Imm operand, and OpNo may refer to it after
388  // the call of commuteInstruction() below. Such situations are avoided
389  // here explicitly as OpNo must be a register operand to be a candidate
390  // for memory folding.
391  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
392  !MI->getOperand(CommuteIdx1).isReg()))
393  return false;
394 
395  if (!CanCommute ||
396  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
397  return false;
398 
399  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
400  if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
401  Opc == AMDGPU::V_SUB_CO_U32_e64 ||
402  Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
403  (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
404 
405  // Verify the other operand is a VGPR, otherwise we would violate the
406  // constant bus restriction.
407  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
408  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
409  if (!OtherOp.isReg() ||
410  !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
411  return false;
412 
413  assert(MI->getOperand(1).isDef());
414 
415  // Make sure to get the 32-bit version of the commuted opcode.
416  unsigned MaybeCommutedOpc = MI->getOpcode();
417  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
418 
419  appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
420  return true;
421  }
422 
423  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
424  return false;
425  }
426 
427  appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
428  return true;
429  }
430 
431  // Check the case where we might introduce a second constant operand to a
432  // scalar instruction
433  if (TII->isSALU(MI->getOpcode())) {
434  const MCInstrDesc &InstDesc = MI->getDesc();
435  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
436 
437  // Fine if the operand can be encoded as an inline constant
438  if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) {
439  // Otherwise check for another constant
440  for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
441  auto &Op = MI->getOperand(i);
442  if (OpNo != i && !Op.isReg() && !TII->isInlineConstant(Op, OpInfo))
443  return false;
444  }
445  }
446  }
447 
448  appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
449  return true;
450 }
451 
452 bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
453  const MachineOperand &UseMO) const {
454  // Operands of SDWA instructions must be registers.
455  return !TII->isSDWA(MI);
456 }
457 
458 // Find a def of the UseReg, check if it is a reg_sequence and find initializers
459 // for each subreg, tracking it to foldable inline immediate if possible.
460 // Returns true on success.
461 bool SIFoldOperands::getRegSeqInit(
462  SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
463  Register UseReg, uint8_t OpTy) const {
465  if (!Def || !Def->isRegSequence())
466  return false;
467 
468  for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
469  MachineOperand *Sub = &Def->getOperand(I);
470  assert(Sub->isReg());
471 
472  for (MachineInstr *SubDef = MRI->getVRegDef(Sub->getReg());
473  SubDef && Sub->isReg() && Sub->getReg().isVirtual() &&
474  !Sub->getSubReg() && TII->isFoldableCopy(*SubDef);
475  SubDef = MRI->getVRegDef(Sub->getReg())) {
476  MachineOperand *Op = &SubDef->getOperand(1);
477  if (Op->isImm()) {
478  if (TII->isInlineConstant(*Op, OpTy))
479  Sub = Op;
480  break;
481  }
482  if (!Op->isReg() || Op->getReg().isPhysical())
483  break;
484  Sub = Op;
485  }
486 
487  Defs.emplace_back(Sub, Def->getOperand(I + 1).getImm());
488  }
489 
490  return true;
491 }
492 
493 bool SIFoldOperands::tryToFoldACImm(
494  const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
495  SmallVectorImpl<FoldCandidate> &FoldList) const {
496  const MCInstrDesc &Desc = UseMI->getDesc();
497  const MCOperandInfo *OpInfo = Desc.OpInfo;
498  if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
499  return false;
500 
501  uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
506  return false;
507 
508  if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
509  TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
511  return true;
512  }
513 
514  if (!OpToFold.isReg())
515  return false;
516 
517  Register UseReg = OpToFold.getReg();
518  if (!UseReg.isVirtual())
519  return false;
520 
521  if (isUseMIInFoldList(FoldList, UseMI))
522  return false;
523 
524  // Maybe it is just a COPY of an immediate itself.
527  if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
528  MachineOperand &DefOp = Def->getOperand(1);
529  if (DefOp.isImm() && TII->isInlineConstant(DefOp, OpTy) &&
530  TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
532  return true;
533  }
534  }
535 
537  if (!getRegSeqInit(Defs, UseReg, OpTy))
538  return false;
539 
540  int32_t Imm;
541  for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
542  const MachineOperand *Op = Defs[I].first;
543  if (!Op->isImm())
544  return false;
545 
546  auto SubImm = Op->getImm();
547  if (!I) {
548  Imm = SubImm;
549  if (!TII->isInlineConstant(*Op, OpTy) ||
550  !TII->isOperandLegal(*UseMI, UseOpIdx, Op))
551  return false;
552 
553  continue;
554  }
555  if (Imm != SubImm)
556  return false; // Can only fold splat constants
557  }
558 
559  appendFoldCandidate(FoldList, UseMI, UseOpIdx, Defs[0].first);
560  return true;
561 }
562 
563 void SIFoldOperands::foldOperand(
564  MachineOperand &OpToFold,
566  int UseOpIdx,
568  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
569  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
570 
571  if (!isUseSafeToFold(*UseMI, UseOp))
572  return;
573 
574  // FIXME: Fold operands with subregs.
575  if (UseOp.isReg() && OpToFold.isReg() &&
576  (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister))
577  return;
578 
579  // Special case for REG_SEQUENCE: We can't fold literals into
580  // REG_SEQUENCE instructions, so we have to fold them into the
581  // uses of REG_SEQUENCE.
582  if (UseMI->isRegSequence()) {
583  Register RegSeqDstReg = UseMI->getOperand(0).getReg();
584  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
585 
586  for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
587  MachineInstr *RSUseMI = RSUse.getParent();
588 
589  if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
590  RSUseMI->getOperandNo(&RSUse), FoldList))
591  continue;
592 
593  if (RSUse.getSubReg() != RegSeqDstSubReg)
594  continue;
595 
596  foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
597  CopiesToReplace);
598  }
599 
600  return;
601  }
602 
603  if (tryToFoldACImm(OpToFold, UseMI, UseOpIdx, FoldList))
604  return;
605 
606  if (frameIndexMayFold(*UseMI, UseOpIdx, OpToFold)) {
607  // Verify that this is a stack access.
608  // FIXME: Should probably use stack pseudos before frame lowering.
609 
610  if (TII->isMUBUF(*UseMI)) {
611  if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
612  MFI->getScratchRSrcReg())
613  return;
614 
615  // Ensure this is either relative to the current frame or the current
616  // wave.
617  MachineOperand &SOff =
618  *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
619  if (!SOff.isImm() || SOff.getImm() != 0)
620  return;
621  }
622 
623  // A frame index will resolve to a positive constant, so it should always be
624  // safe to fold the addressing mode, even pre-GFX9.
626 
627  const unsigned Opc = UseMI->getOpcode();
628  if (TII->isFLATScratch(*UseMI) &&
629  AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) &&
630  !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) {
631  unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc);
632  UseMI->setDesc(TII->get(NewOpc));
633  }
634 
635  return;
636  }
637 
638  bool FoldingImmLike =
639  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
640 
641  if (FoldingImmLike && UseMI->isCopy()) {
642  Register DestReg = UseMI->getOperand(0).getReg();
643  Register SrcReg = UseMI->getOperand(1).getReg();
644  assert(SrcReg.isVirtual());
645 
646  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
647 
648  // Don't fold into a copy to a physical register with the same class. Doing
649  // so would interfere with the register coalescer's logic which would avoid
650  // redundant initializations.
651  if (DestReg.isPhysical() && SrcRC->contains(DestReg))
652  return;
653 
654  const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
655  if (!DestReg.isPhysical()) {
656  if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
658  for (auto &Use : MRI->use_nodbg_operands(DestReg)) {
659  // There's no point trying to fold into an implicit operand.
660  if (Use.isImplicit())
661  continue;
662 
663  CopyUses.emplace_back(Use.getParent(),
664  Use.getParent()->getOperandNo(&Use),
665  &UseMI->getOperand(1));
666  }
667 
668  for (auto &F : CopyUses) {
669  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList,
670  CopiesToReplace);
671  }
672  }
673 
674  if (DestRC == &AMDGPU::AGPR_32RegClass &&
675  TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
676  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
677  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
678  CopiesToReplace.push_back(UseMI);
679  return;
680  }
681  }
682 
683  // In order to fold immediates into copies, we need to change the
684  // copy to a MOV.
685 
686  unsigned MovOp = TII->getMovOpcode(DestRC);
687  if (MovOp == AMDGPU::COPY)
688  return;
689 
690  UseMI->setDesc(TII->get(MovOp));
693  while (ImpOpI != ImpOpE) {
694  MachineInstr::mop_iterator Tmp = ImpOpI;
695  ImpOpI++;
697  }
698  CopiesToReplace.push_back(UseMI);
699  } else {
700  if (UseMI->isCopy() && OpToFold.isReg() &&
701  UseMI->getOperand(0).getReg().isVirtual() &&
702  !UseMI->getOperand(1).getSubReg()) {
703  LLVM_DEBUG(dbgs() << "Folding " << OpToFold << "\n into " << *UseMI);
704  unsigned Size = TII->getOpSize(*UseMI, 1);
705  Register UseReg = OpToFold.getReg();
707  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
708  UseMI->getOperand(1).setIsKill(false);
709  CopiesToReplace.push_back(UseMI);
710  OpToFold.setIsKill(false);
711 
712  // Remove kill flags as kills may now be out of order with uses.
713  MRI->clearKillFlags(OpToFold.getReg());
714 
715  // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32
716  // can only accept VGPR or inline immediate. Recreate a reg_sequence with
717  // its initializers right here, so we will rematerialize immediates and
718  // avoid copies via different reg classes.
720  if (Size > 4 && TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
721  getRegSeqInit(Defs, UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
722  const DebugLoc &DL = UseMI->getDebugLoc();
724 
725  UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
726  for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I)
728 
732  for (unsigned I = 0; I < Size / 4; ++I) {
733  MachineOperand *Def = Defs[I].first;
735  if (Def->isImm() &&
736  TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
737  int64_t Imm = Def->getImm();
738 
739  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
740  BuildMI(MBB, UseMI, DL,
741  TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addImm(Imm);
742  B.addReg(Tmp);
743  } else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
744  auto Src = getRegSubRegPair(*Def);
745  Def->setIsKill(false);
746  if (!SeenAGPRs.insert(Src)) {
747  // We cannot build a reg_sequence out of the same registers, they
748  // must be copied. Better do it here before copyPhysReg() created
749  // several reads to do the AGPR->VGPR->AGPR copy.
750  CopyToVGPR = Src;
751  } else {
752  B.addReg(Src.Reg, Def->isUndef() ? RegState::Undef : 0,
753  Src.SubReg);
754  }
755  } else {
756  assert(Def->isReg());
757  Def->setIsKill(false);
758  auto Src = getRegSubRegPair(*Def);
759 
760  // Direct copy from SGPR to AGPR is not possible. To avoid creation
761  // of exploded copies SGPR->VGPR->AGPR in the copyPhysReg() later,
762  // create a copy here and track if we already have such a copy.
763  if (TRI->isSGPRReg(*MRI, Src.Reg)) {
764  CopyToVGPR = Src;
765  } else {
766  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
767  BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def);
768  B.addReg(Tmp);
769  }
770  }
771 
772  if (CopyToVGPR.Reg) {
773  Register Vgpr;
774  if (VGPRCopies.count(CopyToVGPR)) {
775  Vgpr = VGPRCopies[CopyToVGPR];
776  } else {
777  Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
778  BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
779  VGPRCopies[CopyToVGPR] = Vgpr;
780  }
781  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
782  BuildMI(MBB, UseMI, DL,
783  TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addReg(Vgpr);
784  B.addReg(Tmp);
785  }
786 
787  B.addImm(Defs[I].second);
788  }
789  LLVM_DEBUG(dbgs() << "Folded " << *UseMI);
790  return;
791  }
792 
793  if (Size != 4)
794  return;
795 
796  Register Reg0 = UseMI->getOperand(0).getReg();
797  Register Reg1 = UseMI->getOperand(1).getReg();
798  if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1))
799  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
800  else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1))
801  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
802  else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) &&
803  TRI->isAGPR(*MRI, Reg1))
804  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
805  return;
806  }
807 
808  unsigned UseOpc = UseMI->getOpcode();
809  if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
810  (UseOpc == AMDGPU::V_READLANE_B32 &&
811  (int)UseOpIdx ==
812  AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
813  // %vgpr = V_MOV_B32 imm
814  // %sgpr = V_READFIRSTLANE_B32 %vgpr
815  // =>
816  // %sgpr = S_MOV_B32 imm
817  if (FoldingImmLike) {
820  *OpToFold.getParent(),
821  *UseMI))
822  return;
823 
824  UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
825 
826  if (OpToFold.isImm())
827  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
828  else
830  UseMI->removeOperand(2); // Remove exec read (or src1 for readlane)
831  return;
832  }
833 
834  if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
837  *OpToFold.getParent(),
838  *UseMI))
839  return;
840 
841  // %vgpr = COPY %sgpr0
842  // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
843  // =>
844  // %sgpr1 = COPY %sgpr0
845  UseMI->setDesc(TII->get(AMDGPU::COPY));
846  UseMI->getOperand(1).setReg(OpToFold.getReg());
847  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
848  UseMI->getOperand(1).setIsKill(false);
849  UseMI->removeOperand(2); // Remove exec read (or src1 for readlane)
850  return;
851  }
852  }
853 
854  const MCInstrDesc &UseDesc = UseMI->getDesc();
855 
856  // Don't fold into target independent nodes. Target independent opcodes
857  // don't have defined register classes.
858  if (UseDesc.isVariadic() ||
859  UseOp.isImplicit() ||
860  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
861  return;
862  }
863 
864  if (!FoldingImmLike) {
865  if (OpToFold.isReg() && ST->needsAlignedVGPRs()) {
866  // Don't fold if OpToFold doesn't hold an aligned register.
867  const TargetRegisterClass *RC =
868  TRI->getRegClassForReg(*MRI, OpToFold.getReg());
869  if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) {
870  unsigned SubReg = OpToFold.getSubReg();
871  if (const TargetRegisterClass *SubRC =
873  RC = SubRC;
874  }
875 
876  if (!RC || !TRI->isProperlyAlignedRC(*RC))
877  return;
878  }
879 
880  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold);
881 
882  // FIXME: We could try to change the instruction from 64-bit to 32-bit
883  // to enable more folding opportunities. The shrink operands pass
884  // already does this.
885  return;
886  }
887 
888 
889  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
890  const TargetRegisterClass *FoldRC =
891  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
892 
893  // Split 64-bit constants into 32-bits for folding.
894  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
895  Register UseReg = UseOp.getReg();
896  const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
897 
898  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
899  return;
900 
901  APInt Imm(64, OpToFold.getImm());
902  if (UseOp.getSubReg() == AMDGPU::sub0) {
903  Imm = Imm.getLoBits(32);
904  } else {
905  assert(UseOp.getSubReg() == AMDGPU::sub1);
906  Imm = Imm.getHiBits(32);
907  }
908 
909  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
910  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp);
911  return;
912  }
913 
914  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold);
915 }
916 
917 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
919  switch (Opcode) {
920  case AMDGPU::V_AND_B32_e64:
921  case AMDGPU::V_AND_B32_e32:
922  case AMDGPU::S_AND_B32:
923  Result = LHS & RHS;
924  return true;
925  case AMDGPU::V_OR_B32_e64:
926  case AMDGPU::V_OR_B32_e32:
927  case AMDGPU::S_OR_B32:
928  Result = LHS | RHS;
929  return true;
930  case AMDGPU::V_XOR_B32_e64:
931  case AMDGPU::V_XOR_B32_e32:
932  case AMDGPU::S_XOR_B32:
933  Result = LHS ^ RHS;
934  return true;
935  case AMDGPU::S_XNOR_B32:
936  Result = ~(LHS ^ RHS);
937  return true;
938  case AMDGPU::S_NAND_B32:
939  Result = ~(LHS & RHS);
940  return true;
941  case AMDGPU::S_NOR_B32:
942  Result = ~(LHS | RHS);
943  return true;
944  case AMDGPU::S_ANDN2_B32:
945  Result = LHS & ~RHS;
946  return true;
947  case AMDGPU::S_ORN2_B32:
948  Result = LHS | ~RHS;
949  return true;
950  case AMDGPU::V_LSHL_B32_e64:
951  case AMDGPU::V_LSHL_B32_e32:
952  case AMDGPU::S_LSHL_B32:
953  // The instruction ignores the high bits for out of bounds shifts.
954  Result = LHS << (RHS & 31);
955  return true;
956  case AMDGPU::V_LSHLREV_B32_e64:
957  case AMDGPU::V_LSHLREV_B32_e32:
958  Result = RHS << (LHS & 31);
959  return true;
960  case AMDGPU::V_LSHR_B32_e64:
961  case AMDGPU::V_LSHR_B32_e32:
962  case AMDGPU::S_LSHR_B32:
963  Result = LHS >> (RHS & 31);
964  return true;
965  case AMDGPU::V_LSHRREV_B32_e64:
966  case AMDGPU::V_LSHRREV_B32_e32:
967  Result = RHS >> (LHS & 31);
968  return true;
969  case AMDGPU::V_ASHR_I32_e64:
970  case AMDGPU::V_ASHR_I32_e32:
971  case AMDGPU::S_ASHR_I32:
972  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
973  return true;
974  case AMDGPU::V_ASHRREV_I32_e64:
975  case AMDGPU::V_ASHRREV_I32_e32:
976  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
977  return true;
978  default:
979  return false;
980  }
981 }
982 
983 static unsigned getMovOpc(bool IsScalar) {
984  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
985 }
986 
987 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
988  MI.setDesc(NewDesc);
989 
990  // Remove any leftover implicit operands from mutating the instruction. e.g.
991  // if we replace an s_and_b32 with a copy, we don't need the implicit scc def
992  // anymore.
993  const MCInstrDesc &Desc = MI.getDesc();
994  unsigned NumOps = Desc.getNumOperands() +
995  Desc.getNumImplicitUses() +
996  Desc.getNumImplicitDefs();
997 
998  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
999  MI.removeOperand(I);
1000 }
1001 
1003 SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
1004  // If this has a subregister, it obviously is a register source.
1005  if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
1006  !Op.getReg().isVirtual())
1007  return &Op;
1008 
1009  MachineInstr *Def = MRI->getVRegDef(Op.getReg());
1010  if (Def && Def->isMoveImmediate()) {
1011  MachineOperand &ImmSrc = Def->getOperand(1);
1012  if (ImmSrc.isImm())
1013  return &ImmSrc;
1014  }
1015 
1016  return &Op;
1017 }
1018 
1019 // Try to simplify operations with a constant that may appear after instruction
1020 // selection.
1021 // TODO: See if a frame index with a fixed offset can fold.
1022 bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
1023  unsigned Opc = MI->getOpcode();
1024 
1025  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1026  if (Src0Idx == -1)
1027  return false;
1028  MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx));
1029 
1030  if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1031  Opc == AMDGPU::S_NOT_B32) &&
1032  Src0->isImm()) {
1033  MI->getOperand(1).ChangeToImmediate(~Src0->getImm());
1034  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
1035  return true;
1036  }
1037 
1038  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1039  if (Src1Idx == -1)
1040  return false;
1041  MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx));
1042 
1043  if (!Src0->isImm() && !Src1->isImm())
1044  return false;
1045 
1046  // and k0, k1 -> v_mov_b32 (k0 & k1)
1047  // or k0, k1 -> v_mov_b32 (k0 | k1)
1048  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1049  if (Src0->isImm() && Src1->isImm()) {
1050  int32_t NewImm;
1051  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
1052  return false;
1053 
1054  bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg());
1055 
1056  // Be careful to change the right operand, src0 may belong to a different
1057  // instruction.
1058  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1059  MI->removeOperand(Src1Idx);
1060  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
1061  return true;
1062  }
1063 
1064  if (!MI->isCommutable())
1065  return false;
1066 
1067  if (Src0->isImm() && !Src1->isImm()) {
1068  std::swap(Src0, Src1);
1069  std::swap(Src0Idx, Src1Idx);
1070  }
1071 
1072  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
1073  if (Opc == AMDGPU::V_OR_B32_e64 ||
1074  Opc == AMDGPU::V_OR_B32_e32 ||
1075  Opc == AMDGPU::S_OR_B32) {
1076  if (Src1Val == 0) {
1077  // y = or x, 0 => y = copy x
1078  MI->removeOperand(Src1Idx);
1079  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1080  } else if (Src1Val == -1) {
1081  // y = or x, -1 => y = v_mov_b32 -1
1082  MI->removeOperand(Src1Idx);
1083  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
1084  } else
1085  return false;
1086 
1087  return true;
1088  }
1089 
1090  if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1091  Opc == AMDGPU::S_AND_B32) {
1092  if (Src1Val == 0) {
1093  // y = and x, 0 => y = v_mov_b32 0
1094  MI->removeOperand(Src0Idx);
1095  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
1096  } else if (Src1Val == -1) {
1097  // y = and x, -1 => y = copy x
1098  MI->removeOperand(Src1Idx);
1099  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1100  } else
1101  return false;
1102 
1103  return true;
1104  }
1105 
1106  if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1107  Opc == AMDGPU::S_XOR_B32) {
1108  if (Src1Val == 0) {
1109  // y = xor x, 0 => y = copy x
1110  MI->removeOperand(Src1Idx);
1111  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1112  return true;
1113  }
1114  }
1115 
1116  return false;
1117 }
1118 
1119 // Try to fold an instruction into a simpler one
1120 bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
1121  unsigned Opc = MI.getOpcode();
1122  if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1123  Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1124  return false;
1125 
1126  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1127  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1128  if (!Src1->isIdenticalTo(*Src0)) {
1129  auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1130  auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1131  if (!Src1Imm->isIdenticalTo(*Src0Imm))
1132  return false;
1133  }
1134 
1135  int Src1ModIdx =
1136  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1137  int Src0ModIdx =
1138  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1139  if ((Src1ModIdx != -1 && MI.getOperand(Src1ModIdx).getImm() != 0) ||
1140  (Src0ModIdx != -1 && MI.getOperand(Src0ModIdx).getImm() != 0))
1141  return false;
1142 
1143  LLVM_DEBUG(dbgs() << "Folded " << MI << " into ");
1144  auto &NewDesc =
1145  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
1146  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1147  if (Src2Idx != -1)
1148  MI.removeOperand(Src2Idx);
1149  MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
1150  if (Src1ModIdx != -1)
1151  MI.removeOperand(Src1ModIdx);
1152  if (Src0ModIdx != -1)
1153  MI.removeOperand(Src0ModIdx);
1154  mutateCopyOp(MI, NewDesc);
1155  LLVM_DEBUG(dbgs() << MI);
1156  return true;
1157 }
1158 
1159 bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
1160  if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1161  MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1162  return false;
1163 
1164  MachineOperand *Src0 = getImmOrMaterializedImm(MI.getOperand(1));
1165  if (!Src0->isImm() || Src0->getImm() != 0xffff)
1166  return false;
1167 
1168  Register Src1 = MI.getOperand(2).getReg();
1169  MachineInstr *SrcDef = MRI->getVRegDef(Src1);
1170  if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
1171  return false;
1172 
1173  Register Dst = MI.getOperand(0).getReg();
1174  MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
1175  MI.eraseFromParent();
1176  return true;
1177 }
1178 
1179 bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
1180  MachineOperand &OpToFold) const {
1181  // We need mutate the operands of new mov instructions to add implicit
1182  // uses of EXEC, but adding them invalidates the use_iterator, so defer
1183  // this.
1184  SmallVector<MachineInstr *, 4> CopiesToReplace;
1186  MachineOperand &Dst = MI.getOperand(0);
1187  bool Changed = false;
1188 
1189  if (OpToFold.isImm()) {
1190  for (auto &UseMI :
1192  // Folding the immediate may reveal operations that can be constant
1193  // folded or replaced with a copy. This can happen for example after
1194  // frame indices are lowered to constants or from splitting 64-bit
1195  // constants.
1196  //
1197  // We may also encounter cases where one or both operands are
1198  // immediates materialized into a register, which would ordinarily not
1199  // be folded due to multiple uses or operand constraints.
1200  if (tryConstantFoldOp(&UseMI)) {
1201  LLVM_DEBUG(dbgs() << "Constant folded " << UseMI);
1202  Changed = true;
1203  }
1204  }
1205  }
1206 
1207  SmallVector<MachineOperand *, 4> UsesToProcess;
1208  for (auto &Use : MRI->use_nodbg_operands(Dst.getReg()))
1209  UsesToProcess.push_back(&Use);
1210  for (auto *U : UsesToProcess) {
1211  MachineInstr *UseMI = U->getParent();
1212  foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U), FoldList,
1213  CopiesToReplace);
1214  }
1215 
1216  if (CopiesToReplace.empty() && FoldList.empty())
1217  return Changed;
1218 
1219  MachineFunction *MF = MI.getParent()->getParent();
1220  // Make sure we add EXEC uses to any new v_mov instructions created.
1221  for (MachineInstr *Copy : CopiesToReplace)
1222  Copy->addImplicitDefUseOperands(*MF);
1223 
1224  for (FoldCandidate &Fold : FoldList) {
1225  assert(!Fold.isReg() || Fold.OpToFold);
1226  if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1227  Register Reg = Fold.OpToFold->getReg();
1228  MachineInstr *DefMI = Fold.OpToFold->getParent();
1229  if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
1230  execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
1231  continue;
1232  }
1233  if (updateOperand(Fold)) {
1234  // Clear kill flags.
1235  if (Fold.isReg()) {
1236  assert(Fold.OpToFold && Fold.OpToFold->isReg());
1237  // FIXME: Probably shouldn't bother trying to fold if not an
1238  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
1239  // copies.
1240  MRI->clearKillFlags(Fold.OpToFold->getReg());
1241  }
1242  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
1243  << static_cast<int>(Fold.UseOpNo) << " of "
1244  << *Fold.UseMI);
1245  } else if (Fold.Commuted) {
1246  // Restoring instruction's original operand order if fold has failed.
1247  TII->commuteInstruction(*Fold.UseMI, false);
1248  }
1249  }
1250  return true;
1251 }
1252 
1253 bool SIFoldOperands::tryFoldFoldableCopy(
1254  MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
1255  // Specially track simple redefs of m0 to the same value in a block, so we
1256  // can erase the later ones.
1257  if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1258  MachineOperand &NewM0Val = MI.getOperand(1);
1259  if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1260  MI.eraseFromParent();
1261  return true;
1262  }
1263 
1264  // We aren't tracking other physical registers
1265  CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical())
1266  ? nullptr
1267  : &NewM0Val;
1268  return false;
1269  }
1270 
1271  MachineOperand &OpToFold = MI.getOperand(1);
1272  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1273 
1274  // FIXME: We could also be folding things like TargetIndexes.
1275  if (!FoldingImm && !OpToFold.isReg())
1276  return false;
1277 
1278  if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
1279  return false;
1280 
1281  // Prevent folding operands backwards in the function. For example,
1282  // the COPY opcode must not be replaced by 1 in this example:
1283  //
1284  // %3 = COPY %vgpr0; VGPR_32:%3
1285  // ...
1286  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1287  if (!MI.getOperand(0).getReg().isVirtual())
1288  return false;
1289 
1290  bool Changed = foldInstOperand(MI, OpToFold);
1291 
1292  // If we managed to fold all uses of this copy then we might as well
1293  // delete it now.
1294  // The only reason we need to follow chains of copies here is that
1295  // tryFoldRegSequence looks forward through copies before folding a
1296  // REG_SEQUENCE into its eventual users.
1297  auto *InstToErase = &MI;
1298  while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1299  auto &SrcOp = InstToErase->getOperand(1);
1300  auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
1301  InstToErase->eraseFromParent();
1302  Changed = true;
1303  InstToErase = nullptr;
1304  if (!SrcReg || SrcReg.isPhysical())
1305  break;
1306  InstToErase = MRI->getVRegDef(SrcReg);
1307  if (!InstToErase || !TII->isFoldableCopy(*InstToErase))
1308  break;
1309  }
1310 
1311  if (InstToErase && InstToErase->isRegSequence() &&
1312  MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1313  InstToErase->eraseFromParent();
1314  Changed = true;
1315  }
1316 
1317  return Changed;
1318 }
1319 
1320 // Clamp patterns are canonically selected to v_max_* instructions, so only
1321 // handle them.
1322 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1323  unsigned Op = MI.getOpcode();
1324  switch (Op) {
1325  case AMDGPU::V_MAX_F32_e64:
1326  case AMDGPU::V_MAX_F16_e64:
1327  case AMDGPU::V_MAX_F16_t16_e64:
1328  case AMDGPU::V_MAX_F64_e64:
1329  case AMDGPU::V_PK_MAX_F16: {
1330  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
1331  return nullptr;
1332 
1333  // Make sure sources are identical.
1334  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1335  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1336  if (!Src0->isReg() || !Src1->isReg() ||
1337  Src0->getReg() != Src1->getReg() ||
1338  Src0->getSubReg() != Src1->getSubReg() ||
1339  Src0->getSubReg() != AMDGPU::NoSubRegister)
1340  return nullptr;
1341 
1342  // Can't fold up if we have modifiers.
1343  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1344  return nullptr;
1345 
1346  unsigned Src0Mods
1347  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
1348  unsigned Src1Mods
1349  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
1350 
1351  // Having a 0 op_sel_hi would require swizzling the output in the source
1352  // instruction, which we can't do.
1353  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
1354  : 0u;
1355  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1356  return nullptr;
1357  return Src0;
1358  }
1359  default:
1360  return nullptr;
1361  }
1362 }
1363 
1364 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1365 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
1366  const MachineOperand *ClampSrc = isClamp(MI);
1367  if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
1368  return false;
1369 
1370  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1371 
1372  // The type of clamp must be compatible.
1373  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
1374  return false;
1375 
1376  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
1377  if (!DefClamp)
1378  return false;
1379 
1380  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def);
1381 
1382  // Clamp is applied after omod, so it is OK if omod is set.
1383  DefClamp->setImm(1);
1384  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1385  MI.eraseFromParent();
1386 
1387  // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
1388  // instruction, so we might as well convert it to the more flexible VOP3-only
1389  // mad/fma form.
1390  if (TII->convertToThreeAddress(*Def, nullptr, nullptr))
1391  Def->eraseFromParent();
1392 
1393  return true;
1394 }
1395 
1396 static int getOModValue(unsigned Opc, int64_t Val) {
1397  switch (Opc) {
1398  case AMDGPU::V_MUL_F64_e64: {
1399  switch (Val) {
1400  case 0x3fe0000000000000: // 0.5
1401  return SIOutMods::DIV2;
1402  case 0x4000000000000000: // 2.0
1403  return SIOutMods::MUL2;
1404  case 0x4010000000000000: // 4.0
1405  return SIOutMods::MUL4;
1406  default:
1407  return SIOutMods::NONE;
1408  }
1409  }
1410  case AMDGPU::V_MUL_F32_e64: {
1411  switch (static_cast<uint32_t>(Val)) {
1412  case 0x3f000000: // 0.5
1413  return SIOutMods::DIV2;
1414  case 0x40000000: // 2.0
1415  return SIOutMods::MUL2;
1416  case 0x40800000: // 4.0
1417  return SIOutMods::MUL4;
1418  default:
1419  return SIOutMods::NONE;
1420  }
1421  }
1422  case AMDGPU::V_MUL_F16_e64:
1423  case AMDGPU::V_MUL_F16_t16_e64: {
1424  switch (static_cast<uint16_t>(Val)) {
1425  case 0x3800: // 0.5
1426  return SIOutMods::DIV2;
1427  case 0x4000: // 2.0
1428  return SIOutMods::MUL2;
1429  case 0x4400: // 4.0
1430  return SIOutMods::MUL4;
1431  default:
1432  return SIOutMods::NONE;
1433  }
1434  }
1435  default:
1436  llvm_unreachable("invalid mul opcode");
1437  }
1438 }
1439 
1440 // FIXME: Does this really not support denormals with f16?
1441 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1442 // handled, so will anything other than that break?
1443 std::pair<const MachineOperand *, int>
1444 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1445  unsigned Op = MI.getOpcode();
1446  switch (Op) {
1447  case AMDGPU::V_MUL_F64_e64:
1448  case AMDGPU::V_MUL_F32_e64:
1449  case AMDGPU::V_MUL_F16_t16_e64:
1450  case AMDGPU::V_MUL_F16_e64: {
1451  // If output denormals are enabled, omod is ignored.
1452  if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
1453  ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
1454  Op == AMDGPU::V_MUL_F16_t16_e64) &&
1455  MFI->getMode().FP64FP16OutputDenormals))
1456  return std::make_pair(nullptr, SIOutMods::NONE);
1457 
1458  const MachineOperand *RegOp = nullptr;
1459  const MachineOperand *ImmOp = nullptr;
1460  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1461  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1462  if (Src0->isImm()) {
1463  ImmOp = Src0;
1464  RegOp = Src1;
1465  } else if (Src1->isImm()) {
1466  ImmOp = Src1;
1467  RegOp = Src0;
1468  } else
1469  return std::make_pair(nullptr, SIOutMods::NONE);
1470 
1471  int OMod = getOModValue(Op, ImmOp->getImm());
1472  if (OMod == SIOutMods::NONE ||
1473  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1474  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1475  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1476  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1477  return std::make_pair(nullptr, SIOutMods::NONE);
1478 
1479  return std::make_pair(RegOp, OMod);
1480  }
1481  case AMDGPU::V_ADD_F64_e64:
1482  case AMDGPU::V_ADD_F32_e64:
1483  case AMDGPU::V_ADD_F16_e64:
1484  case AMDGPU::V_ADD_F16_t16_e64: {
1485  // If output denormals are enabled, omod is ignored.
1486  if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
1487  ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
1488  Op == AMDGPU::V_ADD_F16_t16_e64) &&
1489  MFI->getMode().FP64FP16OutputDenormals))
1490  return std::make_pair(nullptr, SIOutMods::NONE);
1491 
1492  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1493  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1494  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1495 
1496  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1497  Src0->getSubReg() == Src1->getSubReg() &&
1498  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1499  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1500  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1501  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1502  return std::make_pair(Src0, SIOutMods::MUL2);
1503 
1504  return std::make_pair(nullptr, SIOutMods::NONE);
1505  }
1506  default:
1507  return std::make_pair(nullptr, SIOutMods::NONE);
1508  }
1509 }
1510 
1511 // FIXME: Does this need to check IEEE bit on function?
1512 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1513  const MachineOperand *RegOp;
1514  int OMod;
1515  std::tie(RegOp, OMod) = isOMod(MI);
1516  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1517  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1518  !MRI->hasOneNonDBGUser(RegOp->getReg()))
1519  return false;
1520 
1521  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1522  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1523  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1524  return false;
1525 
1526  // Clamp is applied after omod. If the source already has clamp set, don't
1527  // fold it.
1528  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1529  return false;
1530 
1531  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def);
1532 
1533  DefOMod->setImm(OMod);
1534  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1535  MI.eraseFromParent();
1536 
1537  // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
1538  // instruction, so we might as well convert it to the more flexible VOP3-only
1539  // mad/fma form.
1540  if (TII->convertToThreeAddress(*Def, nullptr, nullptr))
1541  Def->eraseFromParent();
1542 
1543  return true;
1544 }
1545 
1546 // Try to fold a reg_sequence with vgpr output and agpr inputs into an
1547 // instruction which can take an agpr. So far that means a store.
1548 bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
1549  assert(MI.isRegSequence());
1550  auto Reg = MI.getOperand(0).getReg();
1551 
1552  if (!ST->hasGFX90AInsts() || !TRI->isVGPR(*MRI, Reg) ||
1553  !MRI->hasOneNonDBGUse(Reg))
1554  return false;
1555 
1557  if (!getRegSeqInit(Defs, Reg, MCOI::OPERAND_REGISTER))
1558  return false;
1559 
1560  for (auto &Def : Defs) {
1561  const auto *Op = Def.first;
1562  if (!Op->isReg())
1563  return false;
1564  if (TRI->isAGPR(*MRI, Op->getReg()))
1565  continue;
1566  // Maybe this is a COPY from AREG
1567  const MachineInstr *SubDef = MRI->getVRegDef(Op->getReg());
1568  if (!SubDef || !SubDef->isCopy() || SubDef->getOperand(1).getSubReg())
1569  return false;
1570  if (!TRI->isAGPR(*MRI, SubDef->getOperand(1).getReg()))
1571  return false;
1572  }
1573 
1575  MachineInstr *UseMI = Op->getParent();
1576  while (UseMI->isCopy() && !Op->getSubReg()) {
1577  Reg = UseMI->getOperand(0).getReg();
1578  if (!TRI->isVGPR(*MRI, Reg) || !MRI->hasOneNonDBGUse(Reg))
1579  return false;
1580  Op = &*MRI->use_nodbg_begin(Reg);
1581  UseMI = Op->getParent();
1582  }
1583 
1584  if (Op->getSubReg())
1585  return false;
1586 
1587  unsigned OpIdx = Op - &UseMI->getOperand(0);
1588  const MCInstrDesc &InstDesc = UseMI->getDesc();
1589  const TargetRegisterClass *OpRC =
1590  TII->getRegClass(InstDesc, OpIdx, TRI, *MI.getMF());
1591  if (!OpRC || !TRI->isVectorSuperClass(OpRC))
1592  return false;
1593 
1594  const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
1595  auto Dst = MRI->createVirtualRegister(NewDstRC);
1596  auto RS = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
1597  TII->get(AMDGPU::REG_SEQUENCE), Dst);
1598 
1599  for (unsigned I = 0; I < Defs.size(); ++I) {
1600  MachineOperand *Def = Defs[I].first;
1601  Def->setIsKill(false);
1602  if (TRI->isAGPR(*MRI, Def->getReg())) {
1603  RS.add(*Def);
1604  } else { // This is a copy
1605  MachineInstr *SubDef = MRI->getVRegDef(Def->getReg());
1606  SubDef->getOperand(1).setIsKill(false);
1607  RS.addReg(SubDef->getOperand(1).getReg(), 0, Def->getSubReg());
1608  }
1609  RS.addImm(Defs[I].second);
1610  }
1611 
1612  Op->setReg(Dst);
1613  if (!TII->isOperandLegal(*UseMI, OpIdx, Op)) {
1614  Op->setReg(Reg);
1615  RS->eraseFromParent();
1616  return false;
1617  }
1618 
1619  LLVM_DEBUG(dbgs() << "Folded " << *RS << " into " << *UseMI);
1620 
1621  // Erase the REG_SEQUENCE eagerly, unless we followed a chain of COPY users,
1622  // in which case we can erase them all later in runOnMachineFunction.
1623  if (MRI->use_nodbg_empty(MI.getOperand(0).getReg()))
1624  MI.eraseFromParent();
1625  return true;
1626 }
1627 
1628 // Try to hoist an AGPR to VGPR copy out of the loop across a LCSSA PHI.
1629 // This should allow folding of an AGPR into a consumer which may support it.
1630 // I.e.:
1631 //
1632 // loop: // loop:
1633 // %1:vreg = COPY %0:areg // exit:
1634 // exit: => // %1:areg = PHI %0:areg, %loop
1635 // %2:vreg = PHI %1:vreg, %loop // %2:vreg = COPY %1:areg
1636 bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) {
1637  assert(PHI.isPHI());
1638 
1639  if (PHI.getNumExplicitOperands() != 3) // Single input LCSSA PHI
1640  return false;
1641 
1642  Register PhiIn = PHI.getOperand(1).getReg();
1643  Register PhiOut = PHI.getOperand(0).getReg();
1644  if (PHI.getOperand(1).getSubReg() ||
1645  !TRI->isVGPR(*MRI, PhiIn) || !TRI->isVGPR(*MRI, PhiOut))
1646  return false;
1647 
1648  // A single use should not matter for correctness, but if it has another use
1649  // inside the loop we may perform copy twice in a worst case.
1650  if (!MRI->hasOneNonDBGUse(PhiIn))
1651  return false;
1652 
1653  MachineInstr *Copy = MRI->getVRegDef(PhiIn);
1654  if (!Copy || !Copy->isCopy())
1655  return false;
1656 
1657  Register CopyIn = Copy->getOperand(1).getReg();
1658  if (!TRI->isAGPR(*MRI, CopyIn) || Copy->getOperand(1).getSubReg())
1659  return false;
1660 
1661  const TargetRegisterClass *ARC = MRI->getRegClass(CopyIn);
1662  Register NewReg = MRI->createVirtualRegister(ARC);
1663  PHI.getOperand(1).setReg(CopyIn);
1664  PHI.getOperand(0).setReg(NewReg);
1665 
1666  MachineBasicBlock *MBB = PHI.getParent();
1667  BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(),
1668  TII->get(AMDGPU::COPY), PhiOut)
1669  .addReg(NewReg, RegState::Kill);
1670  Copy->eraseFromParent(); // We know this copy had a single use.
1671 
1672  LLVM_DEBUG(dbgs() << "Folded " << PHI);
1673 
1674  return true;
1675 }
1676 
1677 // Attempt to convert VGPR load to an AGPR load.
1678 bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
1679  assert(MI.mayLoad());
1680  if (!ST->hasGFX90AInsts() || MI.getNumExplicitDefs() != 1)
1681  return false;
1682 
1683  MachineOperand &Def = MI.getOperand(0);
1684  if (!Def.isDef())
1685  return false;
1686 
1687  Register DefReg = Def.getReg();
1688 
1689  if (DefReg.isPhysical() || !TRI->isVGPR(*MRI, DefReg))
1690  return false;
1691 
1693  SmallVector<Register, 8> MoveRegs;
1694  for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg))
1695  Users.push_back(&I);
1696 
1697  if (Users.empty())
1698  return false;
1699 
1700  // Check that all uses a copy to an agpr or a reg_sequence producing an agpr.
1701  while (!Users.empty()) {
1702  const MachineInstr *I = Users.pop_back_val();
1703  if (!I->isCopy() && !I->isRegSequence())
1704  return false;
1705  Register DstReg = I->getOperand(0).getReg();
1706  if (TRI->isAGPR(*MRI, DstReg))
1707  continue;
1708  MoveRegs.push_back(DstReg);
1709  for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg))
1710  Users.push_back(&U);
1711  }
1712 
1713  const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
1714  MRI->setRegClass(DefReg, TRI->getEquivalentAGPRClass(RC));
1715  if (!TII->isOperandLegal(MI, 0, &Def)) {
1716  MRI->setRegClass(DefReg, RC);
1717  return false;
1718  }
1719 
1720  while (!MoveRegs.empty()) {
1721  Register Reg = MoveRegs.pop_back_val();
1722  MRI->setRegClass(Reg, TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg)));
1723  }
1724 
1725  LLVM_DEBUG(dbgs() << "Folded " << MI);
1726 
1727  return true;
1728 }
1729 
1730 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1731  if (skipFunction(MF.getFunction()))
1732  return false;
1733 
1734  MRI = &MF.getRegInfo();
1735  ST = &MF.getSubtarget<GCNSubtarget>();
1736  TII = ST->getInstrInfo();
1737  TRI = &TII->getRegisterInfo();
1738  MFI = MF.getInfo<SIMachineFunctionInfo>();
1739 
1740  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1741  // correctly handle signed zeros.
1742  //
1743  // FIXME: Also need to check strictfp
1744  bool IsIEEEMode = MFI->getMode().IEEE;
1745  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1746 
1747  bool Changed = false;
1748  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1749  MachineOperand *CurrentKnownM0Val = nullptr;
1750  for (auto &MI : make_early_inc_range(*MBB)) {
1751  Changed |= tryFoldCndMask(MI);
1752 
1753  if (tryFoldZeroHighBits(MI)) {
1754  Changed = true;
1755  continue;
1756  }
1757 
1758  if (MI.isRegSequence() && tryFoldRegSequence(MI)) {
1759  Changed = true;
1760  continue;
1761  }
1762 
1763  if (MI.isPHI() && tryFoldLCSSAPhi(MI)) {
1764  Changed = true;
1765  continue;
1766  }
1767 
1768  if (MI.mayLoad() && tryFoldLoad(MI)) {
1769  Changed = true;
1770  continue;
1771  }
1772 
1773  if (TII->isFoldableCopy(MI)) {
1774  Changed |= tryFoldFoldableCopy(MI, CurrentKnownM0Val);
1775  continue;
1776  }
1777 
1778  // Saw an unknown clobber of m0, so we no longer know what it is.
1779  if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1780  CurrentKnownM0Val = nullptr;
1781 
1782  // TODO: Omod might be OK if there is NSZ only on the source
1783  // instruction, and not the omod multiply.
1784  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1785  !tryFoldOMod(MI))
1786  Changed |= tryFoldClamp(MI);
1787  }
1788  }
1789 
1790  return Changed;
1791 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIFoldOperands.cpp:18
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::MachineInstr::getOperandNo
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:706
macToMad
static unsigned macToMad(unsigned Opc)
Definition: SIFoldOperands.cpp:141
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:52
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:75
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SIInstrFlags::IsDOT
@ IsDOT
Definition: SIDefines.h:120
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:107
llvm::AMDGPU::getMFMAEarlyClobberOp
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::MachineOperand::MachineOperandType
MachineOperandType
Definition: MachineOperand.h:50
SIMachineFunctionInfo.h
llvm::getRegSubRegPair
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1191
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
evalBinaryInstruction
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
Definition: SIFoldOperands.cpp:917
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:1279
appendFoldCandidate
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
Definition: SIFoldOperands.cpp:319
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::AMDGPU::OPERAND_REG_IMM_V2FP16
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:162
llvm::MachineInstr::implicit_operands
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:655
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:440
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:551
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1336
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::MachineOperand::ChangeToFrameIndex
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
Definition: MachineOperand.cpp:215
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:330
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
UseOpIdx
unsigned UseOpIdx
Definition: RISCVInsertVSETVLI.cpp:600
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:51
F
#define F(x, y, z)
Definition: MD5.cpp:55
isImm
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Definition: SPIRVInstructionSelector.cpp:1218
llvm::MachineRegisterInfo::use_nodbg_begin
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:526
llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:494
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MCOperandInfo
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:84
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::AMDGPU::hasNamedOperand
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
Definition: AMDGPUBaseInfo.h:303
llvm::AMDGPU::OPERAND_REG_INLINE_AC_LAST
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:201
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:97
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:534
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:782
llvm::AMDGPU::OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:169
GCNSubtarget.h
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:61
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::MCInstrDesc::getNumImplicitUses
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:568
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:480
llvm::MachineOperand::MO_FrameIndex
@ MO_FrameIndex
Abstract Stack Frame Index.
Definition: MachineOperand.h:56
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
getOModValue
static int getOModValue(unsigned Opc, int64_t Val)
Definition: SIFoldOperands.cpp:1396
llvm::AMDGPU::getFlatScratchInstSSfromSV
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MachineOperand::ChangeToImmediate
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition: MachineOperand.cpp:157
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:465
llvm::MCInstrDesc::getNumImplicitDefs
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:590
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:237
llvm::MachineInstr::FmNsz
@ FmNsz
Definition: MachineInstr.h:94
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
getMovOpc
static unsigned getMovOpc(bool IsScalar)
Definition: SIFoldOperands.cpp:983
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::createSIFoldOperandsPass
FunctionPass * createSIFoldOperandsPass()
Definition: SIFoldOperands.cpp:182
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
TSFlags
uint64_t TSFlags
Definition: RISCVInsertVSETVLI.cpp:595
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:433
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Definition: AMDGPUBaseInfo.cpp:2124
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
updateOperand
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
Definition: ConstantHoisting.cpp:731
AMDGPUMCTargetDesc.h
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:771
llvm::MachineOperand::substVirtReg
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
Definition: MachineOperand.cpp:78
llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:495
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::SIOutMods::MUL2
@ MUL2
Definition: SIDefines.h:232
UseReg
static Register UseReg(const MachineOperand &MO)
Definition: HexagonCopyToCombine.cpp:252
llvm::MCOperandInfo::OperandType
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:96
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SIOutMods::MUL4
@ MUL4
Definition: SIDefines.h:233
llvm::codeview::FrameCookieKind::Copy
@ Copy
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::execMayBeModifiedBeforeUse
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
Definition: SIInstrInfo.cpp:8159
llvm::SIOutMods::NONE
@ NONE
Definition: SIDefines.h:231
llvm::MCOI::OPERAND_REGISTER
@ OPERAND_REGISTER
Definition: MCInstrDesc.h:60
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:197
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:218
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:174
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::LQR_Dead
@ LQR_Dead
Register is known to be fully dead.
Definition: MachineBasicBlock.h:1078
llvm::MachineInstr::readsRegister
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:1390
isReg
static bool isReg(const MCInst &MI, unsigned OpNo)
Definition: MipsInstPrinter.cpp:31
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SIInstrFlags::IsMAI
@ IsMAI
Definition: SIDefines.h:117
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:224
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:574
llvm::SIMachineFunctionInfo::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: SIMachineFunctionInfo.h:530
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:180
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:415
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:520
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
uint32_t
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:223
mutateCopyOp
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
Definition: SIFoldOperands.cpp:987
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::SIOutMods::DIV2
@ DIV2
Definition: SIDefines.h:234
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:364
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1328
llvm::MachineOperand::ChangeToGA
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
Definition: MachineOperand.cpp:191
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AMDGPU::OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:163
uint16_t
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MachineBasicBlock::computeRegisterLiveness
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
Definition: MachineBasicBlock.cpp:1500
llvm::AMDGPU::isFoldableLiteralV216
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:2364
llvm::MachineOperand::getIndex
int getIndex() const
Definition: MachineOperand.h:566
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:175
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::AMDGPU::OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:197
llvm::MCInstrDesc::isVariadic
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:258
isUseMIInFoldList
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
Definition: SIFoldOperands.cpp:314
llvm::MachineRegisterInfo::hasOneNonDBGUser
bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
Definition: MachineRegisterInfo.cpp:419
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::MachineInstr::setDesc
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition: MachineInstr.h:1763
llvm::MachineInstr::removeOperand
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
Definition: MachineInstr.cpp:284
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::TargetRegisterInfo::getSubRegisterClass
virtual const TargetRegisterClass * getSubRegisterClass(const TargetRegisterClass *SuperRC, unsigned SubRegIdx) const
Return a register class that can be used for a subregister copy from/into SuperRC at SubRegIdx.
Definition: TargetRegisterInfo.h:645
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:108
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:325
llvm::SIInstrFlags::IsPacked
@ IsPacked
Definition: SIDefines.h:102
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
Users
iv Induction Variable Users
Definition: IVUsers.cpp:48
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:200
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::TargetInstrInfo::CommuteAnyOperandIndex
static const unsigned CommuteAnyOperandIndex
Definition: TargetInstrInfo.h:441
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:692
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::SrcOp
Definition: MachineIRBuilder.h:128
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:56
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:288
llvm::AMDGPU::OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:198
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::MachineOperand::isGlobal
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Definition: MachineOperand.h:338