LLVM  14.0.0git
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "GCNSubtarget.h"
14 #include "SIMachineFunctionInfo.h"
17 
18 #define DEBUG_TYPE "si-fold-operands"
19 using namespace llvm;
20 
21 namespace {
22 
23 struct FoldCandidate {
25  union {
26  MachineOperand *OpToFold;
27  uint64_t ImmToFold;
28  int FrameIndexToFold;
29  };
30  int ShrinkOpcode;
31  unsigned UseOpNo;
33  bool Commuted;
34 
35  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
36  bool Commuted_ = false,
37  int ShrinkOp = -1) :
38  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
39  Kind(FoldOp->getType()),
40  Commuted(Commuted_) {
41  if (FoldOp->isImm()) {
42  ImmToFold = FoldOp->getImm();
43  } else if (FoldOp->isFI()) {
44  FrameIndexToFold = FoldOp->getIndex();
45  } else {
46  assert(FoldOp->isReg() || FoldOp->isGlobal());
47  OpToFold = FoldOp;
48  }
49  }
50 
51  bool isFI() const {
53  }
54 
55  bool isImm() const {
57  }
58 
59  bool isReg() const {
61  }
62 
63  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
64 
65  bool isCommuted() const {
66  return Commuted;
67  }
68 
69  bool needsShrink() const {
70  return ShrinkOpcode != -1;
71  }
72 
73  int getShrinkOpcode() const {
74  return ShrinkOpcode;
75  }
76 };
77 
78 class SIFoldOperands : public MachineFunctionPass {
79 public:
80  static char ID;
82  const SIInstrInfo *TII;
83  const SIRegisterInfo *TRI;
84  const GCNSubtarget *ST;
85  const SIMachineFunctionInfo *MFI;
86 
87  void foldOperand(MachineOperand &OpToFold,
89  int UseOpIdx,
91  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
92 
93  bool tryFoldCndMask(MachineInstr &MI) const;
94  bool tryFoldZeroHighBits(MachineInstr &MI) const;
95  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
96 
97  const MachineOperand *isClamp(const MachineInstr &MI) const;
98  bool tryFoldClamp(MachineInstr &MI);
99 
100  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
101  bool tryFoldOMod(MachineInstr &MI);
102  bool tryFoldRegSequence(MachineInstr &MI);
103  bool tryFoldLCSSAPhi(MachineInstr &MI);
104  bool tryFoldLoad(MachineInstr &MI);
105 
106 public:
107  SIFoldOperands() : MachineFunctionPass(ID) {
109  }
110 
111  bool runOnMachineFunction(MachineFunction &MF) override;
112 
113  StringRef getPassName() const override { return "SI Fold Operands"; }
114 
115  void getAnalysisUsage(AnalysisUsage &AU) const override {
116  AU.setPreservesCFG();
118  }
119 };
120 
121 } // End anonymous namespace.
122 
123 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
124  "SI Fold Operands", false, false)
125 
126 char SIFoldOperands::ID = 0;
127 
128 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
129 
130 // Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
131 static unsigned macToMad(unsigned Opc) {
132  switch (Opc) {
133  case AMDGPU::V_MAC_F32_e64:
134  return AMDGPU::V_MAD_F32_e64;
135  case AMDGPU::V_MAC_F16_e64:
136  return AMDGPU::V_MAD_F16_e64;
137  case AMDGPU::V_FMAC_F32_e64:
138  return AMDGPU::V_FMA_F32_e64;
139  case AMDGPU::V_FMAC_F16_e64:
140  return AMDGPU::V_FMA_F16_gfx9_e64;
141  case AMDGPU::V_FMAC_LEGACY_F32_e64:
142  return AMDGPU::V_FMA_LEGACY_F32_e64;
143  case AMDGPU::V_FMAC_F64_e64:
144  return AMDGPU::V_FMA_F64_e64;
145  }
146  return AMDGPU::INSTRUCTION_LIST_END;
147 }
148 
149 // Wrapper around isInlineConstant that understands special cases when
150 // instruction types are replaced during operand folding.
152  const MachineInstr &UseMI,
153  unsigned OpNo,
154  const MachineOperand &OpToFold) {
155  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
156  return true;
157 
158  unsigned Opc = UseMI.getOpcode();
159  unsigned NewOpc = macToMad(Opc);
160  if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
161  // Special case for mac. Since this is replaced with mad when folded into
162  // src2, we need to check the legality for the final instruction.
163  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
164  if (static_cast<int>(OpNo) == Src2Idx) {
165  const MCInstrDesc &MadDesc = TII->get(NewOpc);
166  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
167  }
168  }
169 
170  return false;
171 }
172 
173 // TODO: Add heuristic that the frame index might not fit in the addressing mode
174 // immediate offset to avoid materializing in loops.
175 static bool frameIndexMayFold(const SIInstrInfo *TII,
176  const MachineInstr &UseMI,
177  int OpNo,
178  const MachineOperand &OpToFold) {
179  if (!OpToFold.isFI())
180  return false;
181 
182  if (TII->isMUBUF(UseMI))
183  return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
184  AMDGPU::OpName::vaddr);
185  if (!TII->isFLATScratch(UseMI))
186  return false;
187 
188  int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
189  AMDGPU::OpName::saddr);
190  if (OpNo == SIdx)
191  return true;
192 
193  int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
194  AMDGPU::OpName::vaddr);
195  return OpNo == VIdx && SIdx == -1;
196 }
197 
199  return new SIFoldOperands();
200 }
201 
202 static bool updateOperand(FoldCandidate &Fold,
203  const SIInstrInfo &TII,
204  const TargetRegisterInfo &TRI,
205  const GCNSubtarget &ST) {
206  MachineInstr *MI = Fold.UseMI;
207  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
208  assert(Old.isReg());
209 
210  if (Fold.isImm()) {
211  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
212  !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
213  AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
214  ST.hasInv2PiInlineImm())) {
215  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
216  // already set.
217  unsigned Opcode = MI->getOpcode();
218  int OpNo = MI->getOperandNo(&Old);
219  int ModIdx = -1;
220  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
221  ModIdx = AMDGPU::OpName::src0_modifiers;
222  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
223  ModIdx = AMDGPU::OpName::src1_modifiers;
224  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
225  ModIdx = AMDGPU::OpName::src2_modifiers;
226  assert(ModIdx != -1);
227  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
228  MachineOperand &Mod = MI->getOperand(ModIdx);
229  unsigned Val = Mod.getImm();
230  if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
231  // Only apply the following transformation if that operand requries
232  // a packed immediate.
233  switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
238  // If upper part is all zero we do not need op_sel_hi.
239  if (!isUInt<16>(Fold.ImmToFold)) {
240  if (!(Fold.ImmToFold & 0xffff)) {
241  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
242  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
243  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
244  return true;
245  }
246  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
247  Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
248  return true;
249  }
250  break;
251  default:
252  break;
253  }
254  }
255  }
256  }
257 
258  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
259  MachineBasicBlock *MBB = MI->getParent();
260  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16);
261  if (Liveness != MachineBasicBlock::LQR_Dead) {
262  LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
263  return false;
264  }
265 
267  int Op32 = Fold.getShrinkOpcode();
268  MachineOperand &Dst0 = MI->getOperand(0);
269  MachineOperand &Dst1 = MI->getOperand(1);
270  assert(Dst0.isDef() && Dst1.isDef());
271 
272  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
273 
274  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
275  Register NewReg0 = MRI.createVirtualRegister(Dst0RC);
276 
277  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
278 
279  if (HaveNonDbgCarryUse) {
280  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
281  .addReg(AMDGPU::VCC, RegState::Kill);
282  }
283 
284  // Keep the old instruction around to avoid breaking iterators, but
285  // replace it with a dummy instruction to remove uses.
286  //
287  // FIXME: We should not invert how this pass looks at operands to avoid
288  // this. Should track set of foldable movs instead of looking for uses
289  // when looking at a use.
290  Dst0.setReg(NewReg0);
291  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
292  MI->RemoveOperand(I);
293  MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
294 
295  if (Fold.isCommuted())
296  TII.commuteInstruction(*Inst32, false);
297  return true;
298  }
299 
300  assert(!Fold.needsShrink() && "not handled");
301 
302  if (Fold.isImm()) {
303  Old.ChangeToImmediate(Fold.ImmToFold);
304  return true;
305  }
306 
307  if (Fold.isGlobal()) {
308  Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
309  Fold.OpToFold->getTargetFlags());
310  return true;
311  }
312 
313  if (Fold.isFI()) {
314  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
315  return true;
316  }
317 
318  MachineOperand *New = Fold.OpToFold;
319  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
320  Old.setIsUndef(New->isUndef());
321  return true;
322 }
323 
325  const MachineInstr *MI) {
326  for (auto Candidate : FoldList) {
327  if (Candidate.UseMI == MI)
328  return true;
329  }
330  return false;
331 }
332 
334  MachineInstr *MI, unsigned OpNo,
335  MachineOperand *FoldOp, bool Commuted = false,
336  int ShrinkOp = -1) {
337  // Skip additional folding on the same operand.
338  for (FoldCandidate &Fold : FoldList)
339  if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
340  return;
341  LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
342  << " operand " << OpNo << "\n " << *MI);
343  FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
344 }
345 
347  MachineInstr *MI, unsigned OpNo,
348  MachineOperand *OpToFold,
349  const SIInstrInfo *TII) {
350  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
351  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
352  unsigned Opc = MI->getOpcode();
353  unsigned NewOpc = macToMad(Opc);
354  if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
355  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
356  // to fold the operand.
357  MI->setDesc(TII->get(NewOpc));
358  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
359  if (FoldAsMAD) {
360  MI->untieRegOperand(OpNo);
361  return true;
362  }
363  MI->setDesc(TII->get(Opc));
364  }
365 
366  // Special case for s_setreg_b32
367  if (OpToFold->isImm()) {
368  unsigned ImmOpc = 0;
369  if (Opc == AMDGPU::S_SETREG_B32)
370  ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
371  else if (Opc == AMDGPU::S_SETREG_B32_mode)
372  ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
373  if (ImmOpc) {
374  MI->setDesc(TII->get(ImmOpc));
375  appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
376  return true;
377  }
378  }
379 
380  // If we are already folding into another operand of MI, then
381  // we can't commute the instruction, otherwise we risk making the
382  // other fold illegal.
383  if (isUseMIInFoldList(FoldList, MI))
384  return false;
385 
386  unsigned CommuteOpNo = OpNo;
387 
388  // Operand is not legal, so try to commute the instruction to
389  // see if this makes it possible to fold.
390  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
391  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
392  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
393 
394  if (CanCommute) {
395  if (CommuteIdx0 == OpNo)
396  CommuteOpNo = CommuteIdx1;
397  else if (CommuteIdx1 == OpNo)
398  CommuteOpNo = CommuteIdx0;
399  }
400 
401 
402  // One of operands might be an Imm operand, and OpNo may refer to it after
403  // the call of commuteInstruction() below. Such situations are avoided
404  // here explicitly as OpNo must be a register operand to be a candidate
405  // for memory folding.
406  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
407  !MI->getOperand(CommuteIdx1).isReg()))
408  return false;
409 
410  if (!CanCommute ||
411  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
412  return false;
413 
414  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
415  if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
416  Opc == AMDGPU::V_SUB_CO_U32_e64 ||
417  Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
418  (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
419  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
420 
421  // Verify the other operand is a VGPR, otherwise we would violate the
422  // constant bus restriction.
423  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
424  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
425  if (!OtherOp.isReg() ||
426  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
427  return false;
428 
429  assert(MI->getOperand(1).isDef());
430 
431  // Make sure to get the 32-bit version of the commuted opcode.
432  unsigned MaybeCommutedOpc = MI->getOpcode();
433  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
434 
435  appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
436  return true;
437  }
438 
439  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
440  return false;
441  }
442 
443  appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
444  return true;
445  }
446 
447  // Check the case where we might introduce a second constant operand to a
448  // scalar instruction
449  if (TII->isSALU(MI->getOpcode())) {
450  const MCInstrDesc &InstDesc = MI->getDesc();
451  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
452  const SIRegisterInfo &SRI = TII->getRegisterInfo();
453 
454  // Fine if the operand can be encoded as an inline constant
455  if (TII->isLiteralConstantLike(*OpToFold, OpInfo)) {
456  if (!SRI.opCanUseInlineConstant(OpInfo.OperandType) ||
457  !TII->isInlineConstant(*OpToFold, OpInfo)) {
458  // Otherwise check for another constant
459  for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
460  auto &Op = MI->getOperand(i);
461  if (OpNo != i &&
462  TII->isLiteralConstantLike(Op, OpInfo)) {
463  return false;
464  }
465  }
466  }
467  }
468  }
469 
470  appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
471  return true;
472 }
473 
474 // If the use operand doesn't care about the value, this may be an operand only
475 // used for register indexing, in which case it is unsafe to fold.
476 static bool isUseSafeToFold(const SIInstrInfo *TII,
477  const MachineInstr &MI,
478  const MachineOperand &UseMO) {
479  if (UseMO.isUndef() || TII->isSDWA(MI))
480  return false;
481 
482  switch (MI.getOpcode()) {
483  case AMDGPU::V_MOV_B32_e32:
484  case AMDGPU::V_MOV_B32_e64:
485  case AMDGPU::V_MOV_B64_PSEUDO:
486  // Do not fold into an indirect mov.
487  return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0);
488  }
489 
490  return true;
491  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
492 }
493 
494 // Find a def of the UseReg, check if it is a reg_sequence and find initializers
495 // for each subreg, tracking it to foldable inline immediate if possible.
496 // Returns true on success.
497 static bool getRegSeqInit(
498  SmallVectorImpl<std::pair<MachineOperand*, unsigned>> &Defs,
499  Register UseReg, uint8_t OpTy,
500  const SIInstrInfo *TII, const MachineRegisterInfo &MRI) {
502  if (!Def || !Def->isRegSequence())
503  return false;
504 
505  for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
506  MachineOperand *Sub = &Def->getOperand(I);
507  assert(Sub->isReg());
508 
509  for (MachineInstr *SubDef = MRI.getVRegDef(Sub->getReg());
510  SubDef && Sub->isReg() && Sub->getReg().isVirtual() &&
511  !Sub->getSubReg() && TII->isFoldableCopy(*SubDef);
512  SubDef = MRI.getVRegDef(Sub->getReg())) {
513  MachineOperand *Op = &SubDef->getOperand(1);
514  if (Op->isImm()) {
515  if (TII->isInlineConstant(*Op, OpTy))
516  Sub = Op;
517  break;
518  }
519  if (!Op->isReg() || Op->getReg().isPhysical())
520  break;
521  Sub = Op;
522  }
523 
524  Defs.emplace_back(Sub, Def->getOperand(I + 1).getImm());
525  }
526 
527  return true;
528 }
529 
530 static bool tryToFoldACImm(const SIInstrInfo *TII,
531  const MachineOperand &OpToFold,
533  unsigned UseOpIdx,
534  SmallVectorImpl<FoldCandidate> &FoldList) {
535  const MCInstrDesc &Desc = UseMI->getDesc();
536  const MCOperandInfo *OpInfo = Desc.OpInfo;
537  if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
538  return false;
539 
540  uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
545  return false;
546 
547  if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
548  TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
549  UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
550  return true;
551  }
552 
553  if (!OpToFold.isReg())
554  return false;
555 
556  Register UseReg = OpToFold.getReg();
557  if (!UseReg.isVirtual())
558  return false;
559 
560  if (isUseMIInFoldList(FoldList, UseMI))
561  return false;
562 
564 
565  // Maybe it is just a COPY of an immediate itself.
567  MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
568  if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
569  MachineOperand &DefOp = Def->getOperand(1);
570  if (DefOp.isImm() && TII->isInlineConstant(DefOp, OpTy) &&
571  TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
572  UseMI->getOperand(UseOpIdx).ChangeToImmediate(DefOp.getImm());
573  return true;
574  }
575  }
576 
578  if (!getRegSeqInit(Defs, UseReg, OpTy, TII, MRI))
579  return false;
580 
581  int32_t Imm;
582  for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
583  const MachineOperand *Op = Defs[I].first;
584  if (!Op->isImm())
585  return false;
586 
587  auto SubImm = Op->getImm();
588  if (!I) {
589  Imm = SubImm;
590  if (!TII->isInlineConstant(*Op, OpTy) ||
591  !TII->isOperandLegal(*UseMI, UseOpIdx, Op))
592  return false;
593 
594  continue;
595  }
596  if (Imm != SubImm)
597  return false; // Can only fold splat constants
598  }
599 
600  appendFoldCandidate(FoldList, UseMI, UseOpIdx, Defs[0].first);
601  return true;
602 }
603 
604 void SIFoldOperands::foldOperand(
605  MachineOperand &OpToFold,
607  int UseOpIdx,
609  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
610  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
611 
612  if (!isUseSafeToFold(TII, *UseMI, UseOp))
613  return;
614 
615  // FIXME: Fold operands with subregs.
616  if (UseOp.isReg() && OpToFold.isReg()) {
617  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
618  return;
619  }
620 
621  // Special case for REG_SEQUENCE: We can't fold literals into
622  // REG_SEQUENCE instructions, so we have to fold them into the
623  // uses of REG_SEQUENCE.
624  if (UseMI->isRegSequence()) {
625  Register RegSeqDstReg = UseMI->getOperand(0).getReg();
626  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
627 
628  for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
629  MachineInstr *RSUseMI = RSUse.getParent();
630 
631  if (tryToFoldACImm(TII, UseMI->getOperand(0), RSUseMI,
632  RSUseMI->getOperandNo(&RSUse), FoldList))
633  continue;
634 
635  if (RSUse.getSubReg() != RegSeqDstSubReg)
636  continue;
637 
638  foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
639  CopiesToReplace);
640  }
641 
642  return;
643  }
644 
645  if (tryToFoldACImm(TII, OpToFold, UseMI, UseOpIdx, FoldList))
646  return;
647 
648  if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
649  // Sanity check that this is a stack access.
650  // FIXME: Should probably use stack pseudos before frame lowering.
651 
652  if (TII->isMUBUF(*UseMI)) {
653  if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
654  MFI->getScratchRSrcReg())
655  return;
656 
657  // Ensure this is either relative to the current frame or the current
658  // wave.
659  MachineOperand &SOff =
660  *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
661  if (!SOff.isImm() || SOff.getImm() != 0)
662  return;
663  }
664 
665  // A frame index will resolve to a positive constant, so it should always be
666  // safe to fold the addressing mode, even pre-GFX9.
667  UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
668 
669  if (TII->isFLATScratch(*UseMI) &&
671  AMDGPU::OpName::vaddr) != -1) {
672  unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
673  UseMI->setDesc(TII->get(NewOpc));
674  }
675 
676  return;
677  }
678 
679  bool FoldingImmLike =
680  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
681 
682  if (FoldingImmLike && UseMI->isCopy()) {
683  Register DestReg = UseMI->getOperand(0).getReg();
684  Register SrcReg = UseMI->getOperand(1).getReg();
685  assert(SrcReg.isVirtual());
686 
687  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
688 
689  // Don't fold into a copy to a physical register with the same class. Doing
690  // so would interfere with the register coalescer's logic which would avoid
691  // redundant initalizations.
692  if (DestReg.isPhysical() && SrcRC->contains(DestReg))
693  return;
694 
695  const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
696  if (!DestReg.isPhysical()) {
697  if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
699  for (auto &Use : MRI->use_nodbg_operands(DestReg)) {
700  // There's no point trying to fold into an implicit operand.
701  if (Use.isImplicit())
702  continue;
703 
704  CopyUses.emplace_back(Use.getParent(),
705  Use.getParent()->getOperandNo(&Use),
706  &UseMI->getOperand(1));
707  }
708  for (auto &F : CopyUses) {
709  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
710  }
711  }
712 
713  if (DestRC == &AMDGPU::AGPR_32RegClass &&
714  TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
715  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
716  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
717  CopiesToReplace.push_back(UseMI);
718  return;
719  }
720  }
721 
722  // In order to fold immediates into copies, we need to change the
723  // copy to a MOV.
724 
725  unsigned MovOp = TII->getMovOpcode(DestRC);
726  if (MovOp == AMDGPU::COPY)
727  return;
728 
729  UseMI->setDesc(TII->get(MovOp));
732  while (ImpOpI != ImpOpE) {
733  MachineInstr::mop_iterator Tmp = ImpOpI;
734  ImpOpI++;
736  }
737  CopiesToReplace.push_back(UseMI);
738  } else {
739  if (UseMI->isCopy() && OpToFold.isReg() &&
740  UseMI->getOperand(0).getReg().isVirtual() &&
741  !UseMI->getOperand(1).getSubReg()) {
742  LLVM_DEBUG(dbgs() << "Folding " << OpToFold << "\n into " << *UseMI);
743  unsigned Size = TII->getOpSize(*UseMI, 1);
744  Register UseReg = OpToFold.getReg();
746  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
747  UseMI->getOperand(1).setIsKill(false);
748  CopiesToReplace.push_back(UseMI);
749  OpToFold.setIsKill(false);
750 
751  // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32
752  // can only accept VGPR or inline immediate. Recreate a reg_sequence with
753  // its initializers right here, so we will rematerialize immediates and
754  // avoid copies via different reg classes.
756  if (Size > 4 && TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
758  *MRI)) {
759  const DebugLoc &DL = UseMI->getDebugLoc();
761 
762  UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
763  for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I)
765 
769  for (unsigned I = 0; I < Size / 4; ++I) {
770  MachineOperand *Def = Defs[I].first;
772  if (Def->isImm() &&
773  TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
774  int64_t Imm = Def->getImm();
775 
776  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
777  BuildMI(MBB, UseMI, DL,
778  TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addImm(Imm);
779  B.addReg(Tmp);
780  } else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
781  auto Src = getRegSubRegPair(*Def);
782  Def->setIsKill(false);
783  if (!SeenAGPRs.insert(Src)) {
784  // We cannot build a reg_sequence out of the same registers, they
785  // must be copied. Better do it here before copyPhysReg() created
786  // several reads to do the AGPR->VGPR->AGPR copy.
787  CopyToVGPR = Src;
788  } else {
789  B.addReg(Src.Reg, Def->isUndef() ? RegState::Undef : 0,
790  Src.SubReg);
791  }
792  } else {
793  assert(Def->isReg());
794  Def->setIsKill(false);
795  auto Src = getRegSubRegPair(*Def);
796 
797  // Direct copy from SGPR to AGPR is not possible. To avoid creation
798  // of exploded copies SGPR->VGPR->AGPR in the copyPhysReg() later,
799  // create a copy here and track if we already have such a copy.
800  if (TRI->isSGPRReg(*MRI, Src.Reg)) {
801  CopyToVGPR = Src;
802  } else {
803  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
804  BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def);
805  B.addReg(Tmp);
806  }
807  }
808 
809  if (CopyToVGPR.Reg) {
810  Register Vgpr;
811  if (VGPRCopies.count(CopyToVGPR)) {
812  Vgpr = VGPRCopies[CopyToVGPR];
813  } else {
814  Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
815  BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
816  VGPRCopies[CopyToVGPR] = Vgpr;
817  }
818  auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
819  BuildMI(MBB, UseMI, DL,
820  TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addReg(Vgpr);
821  B.addReg(Tmp);
822  }
823 
824  B.addImm(Defs[I].second);
825  }
826  LLVM_DEBUG(dbgs() << "Folded " << *UseMI);
827  return;
828  }
829 
830  if (Size != 4)
831  return;
832  if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
833  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
834  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
835  else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
836  TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
837  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
838  else if (ST->hasGFX90AInsts() &&
839  TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
840  TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
841  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
842  return;
843  }
844 
845  unsigned UseOpc = UseMI->getOpcode();
846  if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
847  (UseOpc == AMDGPU::V_READLANE_B32 &&
848  (int)UseOpIdx ==
849  AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
850  // %vgpr = V_MOV_B32 imm
851  // %sgpr = V_READFIRSTLANE_B32 %vgpr
852  // =>
853  // %sgpr = S_MOV_B32 imm
854  if (FoldingImmLike) {
856  UseMI->getOperand(UseOpIdx).getReg(),
857  *OpToFold.getParent(),
858  *UseMI))
859  return;
860 
861  UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
862 
863  if (OpToFold.isImm())
864  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
865  else
867  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
868  return;
869  }
870 
871  if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
873  UseMI->getOperand(UseOpIdx).getReg(),
874  *OpToFold.getParent(),
875  *UseMI))
876  return;
877 
878  // %vgpr = COPY %sgpr0
879  // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
880  // =>
881  // %sgpr1 = COPY %sgpr0
882  UseMI->setDesc(TII->get(AMDGPU::COPY));
883  UseMI->getOperand(1).setReg(OpToFold.getReg());
884  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
885  UseMI->getOperand(1).setIsKill(false);
886  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
887  return;
888  }
889  }
890 
891  const MCInstrDesc &UseDesc = UseMI->getDesc();
892 
893  // Don't fold into target independent nodes. Target independent opcodes
894  // don't have defined register classes.
895  if (UseDesc.isVariadic() ||
896  UseOp.isImplicit() ||
897  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
898  return;
899  }
900 
901  if (!FoldingImmLike) {
902  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
903 
904  // FIXME: We could try to change the instruction from 64-bit to 32-bit
905  // to enable more folding opportunites. The shrink operands pass
906  // already does this.
907  return;
908  }
909 
910 
911  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
912  const TargetRegisterClass *FoldRC =
913  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
914 
915  // Split 64-bit constants into 32-bits for folding.
916  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
917  Register UseReg = UseOp.getReg();
918  const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
919 
920  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
921  return;
922 
923  APInt Imm(64, OpToFold.getImm());
924  if (UseOp.getSubReg() == AMDGPU::sub0) {
925  Imm = Imm.getLoBits(32);
926  } else {
927  assert(UseOp.getSubReg() == AMDGPU::sub1);
928  Imm = Imm.getHiBits(32);
929  }
930 
931  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
932  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
933  return;
934  }
935 
936 
937 
938  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
939 }
940 
941 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
942  uint32_t LHS, uint32_t RHS) {
943  switch (Opcode) {
944  case AMDGPU::V_AND_B32_e64:
945  case AMDGPU::V_AND_B32_e32:
946  case AMDGPU::S_AND_B32:
947  Result = LHS & RHS;
948  return true;
949  case AMDGPU::V_OR_B32_e64:
950  case AMDGPU::V_OR_B32_e32:
951  case AMDGPU::S_OR_B32:
952  Result = LHS | RHS;
953  return true;
954  case AMDGPU::V_XOR_B32_e64:
955  case AMDGPU::V_XOR_B32_e32:
956  case AMDGPU::S_XOR_B32:
957  Result = LHS ^ RHS;
958  return true;
959  case AMDGPU::S_XNOR_B32:
960  Result = ~(LHS ^ RHS);
961  return true;
962  case AMDGPU::S_NAND_B32:
963  Result = ~(LHS & RHS);
964  return true;
965  case AMDGPU::S_NOR_B32:
966  Result = ~(LHS | RHS);
967  return true;
968  case AMDGPU::S_ANDN2_B32:
969  Result = LHS & ~RHS;
970  return true;
971  case AMDGPU::S_ORN2_B32:
972  Result = LHS | ~RHS;
973  return true;
974  case AMDGPU::V_LSHL_B32_e64:
975  case AMDGPU::V_LSHL_B32_e32:
976  case AMDGPU::S_LSHL_B32:
977  // The instruction ignores the high bits for out of bounds shifts.
978  Result = LHS << (RHS & 31);
979  return true;
980  case AMDGPU::V_LSHLREV_B32_e64:
981  case AMDGPU::V_LSHLREV_B32_e32:
982  Result = RHS << (LHS & 31);
983  return true;
984  case AMDGPU::V_LSHR_B32_e64:
985  case AMDGPU::V_LSHR_B32_e32:
986  case AMDGPU::S_LSHR_B32:
987  Result = LHS >> (RHS & 31);
988  return true;
989  case AMDGPU::V_LSHRREV_B32_e64:
990  case AMDGPU::V_LSHRREV_B32_e32:
991  Result = RHS >> (LHS & 31);
992  return true;
993  case AMDGPU::V_ASHR_I32_e64:
994  case AMDGPU::V_ASHR_I32_e32:
995  case AMDGPU::S_ASHR_I32:
996  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
997  return true;
998  case AMDGPU::V_ASHRREV_I32_e64:
999  case AMDGPU::V_ASHRREV_I32_e32:
1000  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
1001  return true;
1002  default:
1003  return false;
1004  }
1005 }
1006 
1007 static unsigned getMovOpc(bool IsScalar) {
1008  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1009 }
1010 
1011 /// Remove any leftover implicit operands from mutating the instruction. e.g.
1012 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
1013 /// anymore.
1015  const MCInstrDesc &Desc = MI.getDesc();
1016  unsigned NumOps = Desc.getNumOperands() +
1017  Desc.getNumImplicitUses() +
1018  Desc.getNumImplicitDefs();
1019 
1020  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
1021  MI.RemoveOperand(I);
1022 }
1023 
1024 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1025  MI.setDesc(NewDesc);
1027 }
1028 
1030  MachineOperand &Op) {
1031  if (Op.isReg()) {
1032  // If this has a subregister, it obviously is a register source.
1033  if (Op.getSubReg() != AMDGPU::NoSubRegister || !Op.getReg().isVirtual())
1034  return &Op;
1035 
1036  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
1037  if (Def && Def->isMoveImmediate()) {
1038  MachineOperand &ImmSrc = Def->getOperand(1);
1039  if (ImmSrc.isImm())
1040  return &ImmSrc;
1041  }
1042  }
1043 
1044  return &Op;
1045 }
1046 
1047 // Try to simplify operations with a constant that may appear after instruction
1048 // selection.
1049 // TODO: See if a frame index with a fixed offset can fold.
1051  MachineInstr *MI) {
1052  unsigned Opc = MI->getOpcode();
1053 
1054  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1055  if (Src0Idx == -1)
1056  return false;
1057  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
1058 
1059  if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1060  Opc == AMDGPU::S_NOT_B32) &&
1061  Src0->isImm()) {
1062  MI->getOperand(1).ChangeToImmediate(~Src0->getImm());
1063  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
1064  return true;
1065  }
1066 
1067  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1068  if (Src1Idx == -1)
1069  return false;
1070  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
1071 
1072  if (!Src0->isImm() && !Src1->isImm())
1073  return false;
1074 
1075  // and k0, k1 -> v_mov_b32 (k0 & k1)
1076  // or k0, k1 -> v_mov_b32 (k0 | k1)
1077  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1078  if (Src0->isImm() && Src1->isImm()) {
1079  int32_t NewImm;
1080  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
1081  return false;
1082 
1083  const SIRegisterInfo &TRI = TII->getRegisterInfo();
1084  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
1085 
1086  // Be careful to change the right operand, src0 may belong to a different
1087  // instruction.
1088  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1089  MI->RemoveOperand(Src1Idx);
1090  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
1091  return true;
1092  }
1093 
1094  if (!MI->isCommutable())
1095  return false;
1096 
1097  if (Src0->isImm() && !Src1->isImm()) {
1098  std::swap(Src0, Src1);
1099  std::swap(Src0Idx, Src1Idx);
1100  }
1101 
1102  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
1103  if (Opc == AMDGPU::V_OR_B32_e64 ||
1104  Opc == AMDGPU::V_OR_B32_e32 ||
1105  Opc == AMDGPU::S_OR_B32) {
1106  if (Src1Val == 0) {
1107  // y = or x, 0 => y = copy x
1108  MI->RemoveOperand(Src1Idx);
1109  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1110  } else if (Src1Val == -1) {
1111  // y = or x, -1 => y = v_mov_b32 -1
1112  MI->RemoveOperand(Src1Idx);
1113  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
1114  } else
1115  return false;
1116 
1117  return true;
1118  }
1119 
1120  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
1121  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
1122  MI->getOpcode() == AMDGPU::S_AND_B32) {
1123  if (Src1Val == 0) {
1124  // y = and x, 0 => y = v_mov_b32 0
1125  MI->RemoveOperand(Src0Idx);
1126  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
1127  } else if (Src1Val == -1) {
1128  // y = and x, -1 => y = copy x
1129  MI->RemoveOperand(Src1Idx);
1130  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1132  } else
1133  return false;
1134 
1135  return true;
1136  }
1137 
1138  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
1139  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
1140  MI->getOpcode() == AMDGPU::S_XOR_B32) {
1141  if (Src1Val == 0) {
1142  // y = xor x, 0 => y = copy x
1143  MI->RemoveOperand(Src1Idx);
1144  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1145  return true;
1146  }
1147  }
1148 
1149  return false;
1150 }
1151 
1152 // Try to fold an instruction into a simpler one
1153 bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
1154  unsigned Opc = MI.getOpcode();
1155  if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1156  Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1157  return false;
1158 
1159  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1160  MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1161  if (!Src1->isIdenticalTo(*Src0)) {
1162  auto *Src0Imm = getImmOrMaterializedImm(*MRI, *Src0);
1163  auto *Src1Imm = getImmOrMaterializedImm(*MRI, *Src1);
1164  if (!Src1Imm->isIdenticalTo(*Src0Imm))
1165  return false;
1166  }
1167 
1168  int Src1ModIdx =
1169  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1170  int Src0ModIdx =
1171  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1172  if ((Src1ModIdx != -1 && MI.getOperand(Src1ModIdx).getImm() != 0) ||
1173  (Src0ModIdx != -1 && MI.getOperand(Src0ModIdx).getImm() != 0))
1174  return false;
1175 
1176  LLVM_DEBUG(dbgs() << "Folded " << MI << " into ");
1177  auto &NewDesc =
1178  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
1179  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1180  if (Src2Idx != -1)
1181  MI.RemoveOperand(Src2Idx);
1182  MI.RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
1183  if (Src1ModIdx != -1)
1184  MI.RemoveOperand(Src1ModIdx);
1185  if (Src0ModIdx != -1)
1186  MI.RemoveOperand(Src0ModIdx);
1187  mutateCopyOp(MI, NewDesc);
1188  LLVM_DEBUG(dbgs() << MI);
1189  return true;
1190 }
1191 
1192 bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
1193  if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1194  MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1195  return false;
1196 
1197  MachineOperand *Src0 = getImmOrMaterializedImm(*MRI, MI.getOperand(1));
1198  if (!Src0->isImm() || Src0->getImm() != 0xffff)
1199  return false;
1200 
1201  Register Src1 = MI.getOperand(2).getReg();
1202  MachineInstr *SrcDef = MRI->getVRegDef(Src1);
1203  if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) {
1204  Register Dst = MI.getOperand(0).getReg();
1205  MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
1206  MI.eraseFromParent();
1207  return true;
1208  }
1209 
1210  return false;
1211 }
1212 
1213 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
1214  MachineOperand &OpToFold) const {
1215  // We need mutate the operands of new mov instructions to add implicit
1216  // uses of EXEC, but adding them invalidates the use_iterator, so defer
1217  // this.
1218  SmallVector<MachineInstr *, 4> CopiesToReplace;
1220  MachineOperand &Dst = MI.getOperand(0);
1221 
1222  if (OpToFold.isImm()) {
1223  for (auto &UseMI :
1225  // Folding the immediate may reveal operations that can be constant
1226  // folded or replaced with a copy. This can happen for example after
1227  // frame indices are lowered to constants or from splitting 64-bit
1228  // constants.
1229  //
1230  // We may also encounter cases where one or both operands are
1231  // immediates materialized into a register, which would ordinarily not
1232  // be folded due to multiple uses or operand constraints.
1233  if (tryConstantFoldOp(*MRI, TII, &UseMI))
1234  LLVM_DEBUG(dbgs() << "Constant folded " << UseMI);
1235  }
1236  }
1237 
1238  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1239  if (FoldingImm) {
1240  unsigned NumLiteralUses = 0;
1241  MachineOperand *NonInlineUse = nullptr;
1242  int NonInlineUseOpNo = -1;
1243 
1244  for (auto &Use :
1245  make_early_inc_range(MRI->use_nodbg_operands(Dst.getReg()))) {
1246  MachineInstr *UseMI = Use.getParent();
1247  unsigned OpNo = UseMI->getOperandNo(&Use);
1248 
1249  // Try to fold any inline immediate uses, and then only fold other
1250  // constants if they have one use.
1251  //
1252  // The legality of the inline immediate must be checked based on the use
1253  // operand, not the defining instruction, because 32-bit instructions
1254  // with 32-bit inline immediate sources may be used to materialize
1255  // constants used in 16-bit operands.
1256  //
1257  // e.g. it is unsafe to fold:
1258  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
1259  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
1260 
1261  // Folding immediates with more than one use will increase program size.
1262  // FIXME: This will also reduce register usage, which may be better
1263  // in some cases. A better heuristic is needed.
1264  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
1265  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
1266  } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
1267  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
1268  } else {
1269  if (++NumLiteralUses == 1) {
1270  NonInlineUse = &Use;
1271  NonInlineUseOpNo = OpNo;
1272  }
1273  }
1274  }
1275 
1276  if (NumLiteralUses == 1) {
1277  MachineInstr *UseMI = NonInlineUse->getParent();
1278  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
1279  }
1280  } else {
1281  // Folding register.
1282  SmallVector <MachineOperand *, 4> UsesToProcess;
1283  for (auto &Use : MRI->use_nodbg_operands(Dst.getReg()))
1284  UsesToProcess.push_back(&Use);
1285  for (auto U : UsesToProcess) {
1286  MachineInstr *UseMI = U->getParent();
1287 
1288  foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U),
1289  FoldList, CopiesToReplace);
1290  }
1291  }
1292 
1293  MachineFunction *MF = MI.getParent()->getParent();
1294  // Make sure we add EXEC uses to any new v_mov instructions created.
1295  for (MachineInstr *Copy : CopiesToReplace)
1296  Copy->addImplicitDefUseOperands(*MF);
1297 
1298  for (FoldCandidate &Fold : FoldList) {
1299  assert(!Fold.isReg() || Fold.OpToFold);
1300  if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1301  Register Reg = Fold.OpToFold->getReg();
1302  MachineInstr *DefMI = Fold.OpToFold->getParent();
1303  if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
1304  execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
1305  continue;
1306  }
1307  if (updateOperand(Fold, *TII, *TRI, *ST)) {
1308  // Clear kill flags.
1309  if (Fold.isReg()) {
1310  assert(Fold.OpToFold && Fold.OpToFold->isReg());
1311  // FIXME: Probably shouldn't bother trying to fold if not an
1312  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
1313  // copies.
1314  MRI->clearKillFlags(Fold.OpToFold->getReg());
1315  }
1316  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
1317  << static_cast<int>(Fold.UseOpNo) << " of "
1318  << *Fold.UseMI);
1319  } else if (Fold.isCommuted()) {
1320  // Restoring instruction's original operand order if fold has failed.
1321  TII->commuteInstruction(*Fold.UseMI, false);
1322  }
1323  }
1324 }
1325 
1326 // Clamp patterns are canonically selected to v_max_* instructions, so only
1327 // handle them.
1328 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1329  unsigned Op = MI.getOpcode();
1330  switch (Op) {
1331  case AMDGPU::V_MAX_F32_e64:
1332  case AMDGPU::V_MAX_F16_e64:
1333  case AMDGPU::V_MAX_F64_e64:
1334  case AMDGPU::V_PK_MAX_F16: {
1335  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
1336  return nullptr;
1337 
1338  // Make sure sources are identical.
1339  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1340  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1341  if (!Src0->isReg() || !Src1->isReg() ||
1342  Src0->getReg() != Src1->getReg() ||
1343  Src0->getSubReg() != Src1->getSubReg() ||
1344  Src0->getSubReg() != AMDGPU::NoSubRegister)
1345  return nullptr;
1346 
1347  // Can't fold up if we have modifiers.
1348  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1349  return nullptr;
1350 
1351  unsigned Src0Mods
1352  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
1353  unsigned Src1Mods
1354  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
1355 
1356  // Having a 0 op_sel_hi would require swizzling the output in the source
1357  // instruction, which we can't do.
1358  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
1359  : 0u;
1360  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1361  return nullptr;
1362  return Src0;
1363  }
1364  default:
1365  return nullptr;
1366  }
1367 }
1368 
1369 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1370 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
1371  const MachineOperand *ClampSrc = isClamp(MI);
1372  if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
1373  return false;
1374 
1375  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1376 
1377  // The type of clamp must be compatible.
1378  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
1379  return false;
1380 
1381  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
1382  if (!DefClamp)
1383  return false;
1384 
1385  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def);
1386 
1387  // Clamp is applied after omod, so it is OK if omod is set.
1388  DefClamp->setImm(1);
1389  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1390  MI.eraseFromParent();
1391 
1392  // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
1393  // instruction, so we might as well convert it to the more flexible VOP3-only
1394  // mad/fma form.
1395  MachineFunction::iterator MBBI = Def->getParent()->getIterator();
1396  if (MachineInstr *NewMI = TII->convertToThreeAddress(MBBI, *Def, nullptr))
1397  Def->eraseFromParent();
1398 
1399  return true;
1400 }
1401 
1402 static int getOModValue(unsigned Opc, int64_t Val) {
1403  switch (Opc) {
1404  case AMDGPU::V_MUL_F64_e64: {
1405  switch (Val) {
1406  case 0x3fe0000000000000: // 0.5
1407  return SIOutMods::DIV2;
1408  case 0x4000000000000000: // 2.0
1409  return SIOutMods::MUL2;
1410  case 0x4010000000000000: // 4.0
1411  return SIOutMods::MUL4;
1412  default:
1413  return SIOutMods::NONE;
1414  }
1415  }
1416  case AMDGPU::V_MUL_F32_e64: {
1417  switch (static_cast<uint32_t>(Val)) {
1418  case 0x3f000000: // 0.5
1419  return SIOutMods::DIV2;
1420  case 0x40000000: // 2.0
1421  return SIOutMods::MUL2;
1422  case 0x40800000: // 4.0
1423  return SIOutMods::MUL4;
1424  default:
1425  return SIOutMods::NONE;
1426  }
1427  }
1428  case AMDGPU::V_MUL_F16_e64: {
1429  switch (static_cast<uint16_t>(Val)) {
1430  case 0x3800: // 0.5
1431  return SIOutMods::DIV2;
1432  case 0x4000: // 2.0
1433  return SIOutMods::MUL2;
1434  case 0x4400: // 4.0
1435  return SIOutMods::MUL4;
1436  default:
1437  return SIOutMods::NONE;
1438  }
1439  }
1440  default:
1441  llvm_unreachable("invalid mul opcode");
1442  }
1443 }
1444 
1445 // FIXME: Does this really not support denormals with f16?
1446 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1447 // handled, so will anything other than that break?
1448 std::pair<const MachineOperand *, int>
1449 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1450  unsigned Op = MI.getOpcode();
1451  switch (Op) {
1452  case AMDGPU::V_MUL_F64_e64:
1453  case AMDGPU::V_MUL_F32_e64:
1454  case AMDGPU::V_MUL_F16_e64: {
1455  // If output denormals are enabled, omod is ignored.
1456  if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
1457  ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64) &&
1458  MFI->getMode().FP64FP16OutputDenormals))
1459  return std::make_pair(nullptr, SIOutMods::NONE);
1460 
1461  const MachineOperand *RegOp = nullptr;
1462  const MachineOperand *ImmOp = nullptr;
1463  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1464  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1465  if (Src0->isImm()) {
1466  ImmOp = Src0;
1467  RegOp = Src1;
1468  } else if (Src1->isImm()) {
1469  ImmOp = Src1;
1470  RegOp = Src0;
1471  } else
1472  return std::make_pair(nullptr, SIOutMods::NONE);
1473 
1474  int OMod = getOModValue(Op, ImmOp->getImm());
1475  if (OMod == SIOutMods::NONE ||
1476  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1477  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1478  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1479  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1480  return std::make_pair(nullptr, SIOutMods::NONE);
1481 
1482  return std::make_pair(RegOp, OMod);
1483  }
1484  case AMDGPU::V_ADD_F64_e64:
1485  case AMDGPU::V_ADD_F32_e64:
1486  case AMDGPU::V_ADD_F16_e64: {
1487  // If output denormals are enabled, omod is ignored.
1488  if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
1489  ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64) &&
1490  MFI->getMode().FP64FP16OutputDenormals))
1491  return std::make_pair(nullptr, SIOutMods::NONE);
1492 
1493  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1494  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1495  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1496 
1497  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1498  Src0->getSubReg() == Src1->getSubReg() &&
1499  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1500  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1501  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1502  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1503  return std::make_pair(Src0, SIOutMods::MUL2);
1504 
1505  return std::make_pair(nullptr, SIOutMods::NONE);
1506  }
1507  default:
1508  return std::make_pair(nullptr, SIOutMods::NONE);
1509  }
1510 }
1511 
1512 // FIXME: Does this need to check IEEE bit on function?
1513 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1514  const MachineOperand *RegOp;
1515  int OMod;
1516  std::tie(RegOp, OMod) = isOMod(MI);
1517  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1518  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1519  !MRI->hasOneNonDBGUser(RegOp->getReg()))
1520  return false;
1521 
1522  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1523  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1524  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1525  return false;
1526 
1527  // Clamp is applied after omod. If the source already has clamp set, don't
1528  // fold it.
1529  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1530  return false;
1531 
1532  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def);
1533 
1534  DefOMod->setImm(OMod);
1535  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1536  MI.eraseFromParent();
1537 
1538  // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
1539  // instruction, so we might as well convert it to the more flexible VOP3-only
1540  // mad/fma form.
1541  MachineFunction::iterator MBBI = Def->getParent()->getIterator();
1542  if (MachineInstr *NewMI = TII->convertToThreeAddress(MBBI, *Def, nullptr))
1543  Def->eraseFromParent();
1544 
1545  return true;
1546 }
1547 
1548 // Try to fold a reg_sequence with vgpr output and agpr inputs into an
1549 // instruction which can take an agpr. So far that means a store.
1550 bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
1551  assert(MI.isRegSequence());
1552  auto Reg = MI.getOperand(0).getReg();
1553 
1554  if (!ST->hasGFX90AInsts() || !TRI->isVGPR(*MRI, Reg) ||
1555  !MRI->hasOneNonDBGUse(Reg))
1556  return false;
1557 
1560  return false;
1561 
1562  for (auto &Def : Defs) {
1563  const auto *Op = Def.first;
1564  if (!Op->isReg())
1565  return false;
1566  if (TRI->isAGPR(*MRI, Op->getReg()))
1567  continue;
1568  // Maybe this is a COPY from AREG
1569  const MachineInstr *SubDef = MRI->getVRegDef(Op->getReg());
1570  if (!SubDef || !SubDef->isCopy() || SubDef->getOperand(1).getSubReg())
1571  return false;
1572  if (!TRI->isAGPR(*MRI, SubDef->getOperand(1).getReg()))
1573  return false;
1574  }
1575 
1577  MachineInstr *UseMI = Op->getParent();
1578  while (UseMI->isCopy() && !Op->getSubReg()) {
1579  Reg = UseMI->getOperand(0).getReg();
1580  if (!TRI->isVGPR(*MRI, Reg) || !MRI->hasOneNonDBGUse(Reg))
1581  return false;
1582  Op = &*MRI->use_nodbg_begin(Reg);
1583  UseMI = Op->getParent();
1584  }
1585 
1586  if (Op->getSubReg())
1587  return false;
1588 
1589  unsigned OpIdx = Op - &UseMI->getOperand(0);
1590  const MCInstrDesc &InstDesc = UseMI->getDesc();
1591  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
1592  switch (OpInfo.RegClass) {
1593  case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
1594  case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
1595  case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
1596  case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
1597  case AMDGPU::AV_160RegClassID:
1598  break;
1599  default:
1600  return false;
1601  }
1602 
1603  const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
1604  auto Dst = MRI->createVirtualRegister(NewDstRC);
1605  auto RS = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
1606  TII->get(AMDGPU::REG_SEQUENCE), Dst);
1607 
1608  for (unsigned I = 0; I < Defs.size(); ++I) {
1609  MachineOperand *Def = Defs[I].first;
1610  Def->setIsKill(false);
1611  if (TRI->isAGPR(*MRI, Def->getReg())) {
1612  RS.add(*Def);
1613  } else { // This is a copy
1614  MachineInstr *SubDef = MRI->getVRegDef(Def->getReg());
1615  SubDef->getOperand(1).setIsKill(false);
1616  RS.addReg(SubDef->getOperand(1).getReg(), 0, Def->getSubReg());
1617  }
1618  RS.addImm(Defs[I].second);
1619  }
1620 
1621  Op->setReg(Dst);
1622  if (!TII->isOperandLegal(*UseMI, OpIdx, Op)) {
1623  Op->setReg(Reg);
1624  RS->eraseFromParent();
1625  return false;
1626  }
1627 
1628  LLVM_DEBUG(dbgs() << "Folded " << *RS << " into " << *UseMI);
1629 
1630  // Erase the REG_SEQUENCE eagerly, unless we followed a chain of COPY users,
1631  // in which case we can erase them all later in runOnMachineFunction.
1632  if (MRI->use_nodbg_empty(MI.getOperand(0).getReg()))
1633  MI.eraseFromParentAndMarkDBGValuesForRemoval();
1634  return true;
1635 }
1636 
1637 // Try to hoist an AGPR to VGPR copy out of the loop across a LCSSA PHI.
1638 // This should allow folding of an AGPR into a consumer which may support it.
1639 // I.e.:
1640 //
1641 // loop: // loop:
1642 // %1:vreg = COPY %0:areg // exit:
1643 // exit: => // %1:areg = PHI %0:areg, %loop
1644 // %2:vreg = PHI %1:vreg, %loop // %2:vreg = COPY %1:areg
1645 bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) {
1646  assert(PHI.isPHI());
1647 
1648  if (PHI.getNumExplicitOperands() != 3) // Single input LCSSA PHI
1649  return false;
1650 
1651  Register PhiIn = PHI.getOperand(1).getReg();
1652  Register PhiOut = PHI.getOperand(0).getReg();
1653  if (PHI.getOperand(1).getSubReg() ||
1654  !TRI->isVGPR(*MRI, PhiIn) || !TRI->isVGPR(*MRI, PhiOut))
1655  return false;
1656 
1657  // A single use should not matter for correctness, but if it has another use
1658  // inside the loop we may perform copy twice in a worst case.
1659  if (!MRI->hasOneNonDBGUse(PhiIn))
1660  return false;
1661 
1662  MachineInstr *Copy = MRI->getVRegDef(PhiIn);
1663  if (!Copy || !Copy->isCopy())
1664  return false;
1665 
1666  Register CopyIn = Copy->getOperand(1).getReg();
1667  if (!TRI->isAGPR(*MRI, CopyIn) || Copy->getOperand(1).getSubReg())
1668  return false;
1669 
1670  const TargetRegisterClass *ARC = MRI->getRegClass(CopyIn);
1671  Register NewReg = MRI->createVirtualRegister(ARC);
1672  PHI.getOperand(1).setReg(CopyIn);
1673  PHI.getOperand(0).setReg(NewReg);
1674 
1675  MachineBasicBlock *MBB = PHI.getParent();
1676  BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(),
1677  TII->get(AMDGPU::COPY), PhiOut)
1678  .addReg(NewReg, RegState::Kill);
1679  Copy->eraseFromParent(); // We know this copy had a single use.
1680 
1681  LLVM_DEBUG(dbgs() << "Folded " << PHI);
1682 
1683  return true;
1684 }
1685 
1686 // Attempt to convert VGPR load to an AGPR load.
1687 bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
1688  assert(MI.mayLoad());
1689  if (!ST->hasGFX90AInsts() || MI.getNumExplicitDefs() != 1)
1690  return false;
1691 
1692  MachineOperand &Def = MI.getOperand(0);
1693  if (!Def.isDef())
1694  return false;
1695 
1696  Register DefReg = Def.getReg();
1697 
1698  if (DefReg.isPhysical() || !TRI->isVGPR(*MRI, DefReg))
1699  return false;
1700 
1702  SmallVector<Register, 8> MoveRegs;
1703  for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) {
1704  Users.push_back(&I);
1705  }
1706  if (Users.empty())
1707  return false;
1708 
1709  // Check that all uses a copy to an agpr or a reg_sequence producing an agpr.
1710  while (!Users.empty()) {
1711  const MachineInstr *I = Users.pop_back_val();
1712  if (!I->isCopy() && !I->isRegSequence())
1713  return false;
1714  Register DstReg = I->getOperand(0).getReg();
1715  if (TRI->isAGPR(*MRI, DstReg))
1716  continue;
1717  MoveRegs.push_back(DstReg);
1718  for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) {
1719  Users.push_back(&U);
1720  }
1721  }
1722 
1723  const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
1724  MRI->setRegClass(DefReg, TRI->getEquivalentAGPRClass(RC));
1725  if (!TII->isOperandLegal(MI, 0, &Def)) {
1726  MRI->setRegClass(DefReg, RC);
1727  return false;
1728  }
1729 
1730  while (!MoveRegs.empty()) {
1731  Register Reg = MoveRegs.pop_back_val();
1732  MRI->setRegClass(Reg, TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg)));
1733  }
1734 
1735  LLVM_DEBUG(dbgs() << "Folded " << MI);
1736 
1737  return true;
1738 }
1739 
1740 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1741  if (skipFunction(MF.getFunction()))
1742  return false;
1743 
1744  MRI = &MF.getRegInfo();
1745  ST = &MF.getSubtarget<GCNSubtarget>();
1746  TII = ST->getInstrInfo();
1747  TRI = &TII->getRegisterInfo();
1748  MFI = MF.getInfo<SIMachineFunctionInfo>();
1749 
1750  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1751  // correctly handle signed zeros.
1752  //
1753  // FIXME: Also need to check strictfp
1754  bool IsIEEEMode = MFI->getMode().IEEE;
1755  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1756 
1757  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1758  MachineOperand *CurrentKnownM0Val = nullptr;
1759  for (auto &MI : make_early_inc_range(*MBB)) {
1760  tryFoldCndMask(MI);
1761 
1762  if (tryFoldZeroHighBits(MI))
1763  continue;
1764 
1765  if (MI.isRegSequence() && tryFoldRegSequence(MI))
1766  continue;
1767 
1768  if (MI.isPHI() && tryFoldLCSSAPhi(MI))
1769  continue;
1770 
1771  if (MI.mayLoad() && tryFoldLoad(MI))
1772  continue;
1773 
1774  if (!TII->isFoldableCopy(MI)) {
1775  // Saw an unknown clobber of m0, so we no longer know what it is.
1776  if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1777  CurrentKnownM0Val = nullptr;
1778 
1779  // TODO: Omod might be OK if there is NSZ only on the source
1780  // instruction, and not the omod multiply.
1781  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1782  !tryFoldOMod(MI))
1783  tryFoldClamp(MI);
1784 
1785  continue;
1786  }
1787 
1788  // Specially track simple redefs of m0 to the same value in a block, so we
1789  // can erase the later ones.
1790  if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1791  MachineOperand &NewM0Val = MI.getOperand(1);
1792  if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1793  MI.eraseFromParent();
1794  continue;
1795  }
1796 
1797  // We aren't tracking other physical registers
1798  CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
1799  nullptr : &NewM0Val;
1800  continue;
1801  }
1802 
1803  MachineOperand &OpToFold = MI.getOperand(1);
1804  bool FoldingImm =
1805  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1806 
1807  // FIXME: We could also be folding things like TargetIndexes.
1808  if (!FoldingImm && !OpToFold.isReg())
1809  continue;
1810 
1811  if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
1812  continue;
1813 
1814  // Prevent folding operands backwards in the function. For example,
1815  // the COPY opcode must not be replaced by 1 in this example:
1816  //
1817  // %3 = COPY %vgpr0; VGPR_32:%3
1818  // ...
1819  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1820  if (!MI.getOperand(0).getReg().isVirtual())
1821  continue;
1822 
1823  foldInstOperand(MI, OpToFold);
1824 
1825  // If we managed to fold all uses of this copy then we might as well
1826  // delete it now.
1827  // The only reason we need to follow chains of copies here is that
1828  // tryFoldRegSequence looks forward through copies before folding a
1829  // REG_SEQUENCE into its eventual users.
1830  auto *InstToErase = &MI;
1831  while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1832  auto &SrcOp = InstToErase->getOperand(1);
1833  auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
1834  InstToErase->eraseFromParentAndMarkDBGValuesForRemoval();
1835  InstToErase = nullptr;
1836  if (!SrcReg || SrcReg.isPhysical())
1837  break;
1838  InstToErase = MRI->getVRegDef(SrcReg);
1839  if (!InstToErase || !TII->isFoldableCopy(*InstToErase))
1840  break;
1841  }
1842  if (InstToErase && InstToErase->isRegSequence() &&
1843  MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg()))
1845  }
1846  }
1847  return true;
1848 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
tryToFoldACImm
static bool tryToFoldACImm(const SIInstrInfo *TII, const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl< FoldCandidate > &FoldList)
Definition: SIFoldOperands.cpp:530
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIFoldOperands.cpp:18
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstr::getOperandNo
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:683
macToMad
static unsigned macToMad(unsigned Opc)
Definition: SIFoldOperands.cpp:131
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:53
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:71
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:211
llvm::SIOutMods::NONE
@ NONE
Definition: SIDefines.h:218
isUseSafeToFold
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
Definition: SIFoldOperands.cpp:476
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::MachineOperand::MachineOperandType
MachineOperandType
Definition: MachineOperand.h:51
SIMachineFunctionInfo.h
llvm::getRegSubRegPair
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1153
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstr::getNumExplicitOperands
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Definition: MachineInstr.cpp:726
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
evalBinaryInstruction
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
Definition: SIFoldOperands.cpp:941
llvm::MachineInstr::RemoveOperand
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
Definition: MachineInstr.cpp:303
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:916
appendFoldCandidate
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
Definition: SIFoldOperands.cpp:333
updateOperand
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI, const GCNSubtarget &ST)
Definition: SIFoldOperands.cpp:202
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::AMDGPU::OPERAND_REG_IMM_V2FP16
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:150
llvm::MachineInstr::implicit_operands
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:632
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:655
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1291
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::MachineOperand::ChangeToFrameIndex
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
Definition: MachineOperand.cpp:214
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:331
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:52
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineRegisterInfo::use_nodbg_begin
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:518
llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:477
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MCOperandInfo
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:83
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::AMDGPU::OPERAND_REG_INLINE_AC_LAST
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:185
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:93
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:380
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:526
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:773
llvm::AMDGPU::OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:157
GCNSubtarget.h
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:62
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:724
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::MCInstrDesc::getNumImplicitUses
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:562
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:471
llvm::MachineOperand::MO_FrameIndex
@ MO_FrameIndex
Abstract Stack Frame Index.
Definition: MachineOperand.h:57
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
getOModValue
static int getOModValue(unsigned Opc, int64_t Val)
Definition: SIFoldOperands.cpp:1402
llvm::AMDGPU::getFlatScratchInstSSfromSV
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand::ChangeToImmediate
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition: MachineOperand.cpp:156
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::SIOutMods::MUL2
@ MUL2
Definition: SIDefines.h:219
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::MCInstrDesc::getNumImplicitDefs
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:584
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:28
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:238
llvm::MachineInstr::FmNsz
@ FmNsz
Definition: MachineInstr.h:92
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPUMachineFunction::getMode
AMDGPU::SIModeRegisterDefaults getMode() const
Definition: AMDGPUMachineFunction.h:74
getMovOpc
static unsigned getMovOpc(bool IsScalar)
Definition: SIFoldOperands.cpp:1007
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:210
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::createSIFoldOperandsPass
FunctionPass * createSIFoldOperandsPass()
Definition: SIFoldOperands.cpp:198
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:431
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::SIOutMods::DIV2
@ DIV2
Definition: SIDefines.h:221
llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Definition: AMDGPUBaseInfo.cpp:1606
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::SIInstrFlags::IsMAI
@ IsMAI
Definition: SIDefines.h:110
llvm::MachineOperand::isUndef
bool isUndef() const
Definition: MachineOperand.h:395
AMDGPUMCTargetDesc.h
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:739
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::MachineOperand::substVirtReg
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
Definition: MachineOperand.cpp:77
llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:478
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
uint64_t
getImmOrMaterializedImm
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
Definition: SIFoldOperands.cpp:1029
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
UseReg
static Register UseReg(const MachineOperand &MO)
Definition: HexagonCopyToCombine.cpp:259
llvm::MCOperandInfo::OperandType
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:95
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::execMayBeModifiedBeforeUse
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
Definition: SIInstrInfo.cpp:7858
llvm::MCOI::OPERAND_REGISTER
@ OPERAND_REGISTER
Definition: MCInstrDesc.h:59
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:200
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:219
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::SIInstrFlags::IsPacked
@ IsPacked
Definition: SIDefines.h:95
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
getRegSeqInit
static bool getRegSeqInit(SmallVectorImpl< std::pair< MachineOperand *, unsigned >> &Defs, Register UseReg, uint8_t OpTy, const SIInstrInfo *TII, const MachineRegisterInfo &MRI)
Definition: SIFoldOperands.cpp:497
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:162
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::LQR_Dead
@ LQR_Dead
Register is known to be fully dead.
Definition: MachineBasicBlock.h:1019
llvm::MachineInstr::readsRegister
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:1368
isReg
static bool isReg(const MCInst &MI, unsigned OpNo)
Definition: MipsInstPrinter.cpp:31
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:566
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:171
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:419
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:511
uint32_t
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
mutateCopyOp
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
Definition: SIFoldOperands.cpp:1024
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
frameIndexMayFold
static bool frameIndexMayFold(const SIInstrInfo *TII, const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold)
Definition: SIFoldOperands.cpp:175
stripExtraCopyOperands
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
Definition: SIFoldOperands.cpp:1014
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1283
llvm::MachineOperand::ChangeToGA
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
Definition: MachineOperand.cpp:190
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::AMDGPU::OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:151
uint16_t
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::MachineBasicBlock::computeRegisterLiveness
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
Definition: MachineBasicBlock.cpp:1485
llvm::AMDGPU::isFoldableLiteralV216
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1796
llvm::MachineOperand::getIndex
int getIndex() const
Definition: MachineOperand.h:557
llvm::AMDGPU::OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:163
llvm::AMDGPU::OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:181
llvm::MCInstrDesc::isVariadic
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:256
llvm::SIOutMods::MUL4
@ MUL4
Definition: SIDefines.h:220
isUseMIInFoldList
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
Definition: SIFoldOperands.cpp:324
llvm::MachineRegisterInfo::hasOneNonDBGUser
bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
Definition: MachineRegisterInfo.cpp:423
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:323
llvm::MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval
void eraseFromParentAndMarkDBGValuesForRemoval()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:682
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
tryConstantFoldOp
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI)
Definition: SIFoldOperands.cpp:1050
tryAddToFoldList
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
Definition: SIFoldOperands.cpp:346
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
Users
iv Induction Variable Users
Definition: IVUsers.cpp:52
isInlineConstantIfFolded
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Definition: SIFoldOperands.cpp:151
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::MachineInstr::setDesc
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition: MachineInstr.h:1741
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AMDGPU::OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:184
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SIRegisterInfo::opCanUseInlineConstant
bool opCanUseInlineConstant(unsigned OpType) const
Definition: SIRegisterInfo.cpp:2234
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::TargetInstrInfo::CommuteAnyOperandIndex
static const unsigned CommuteAnyOperandIndex
Definition: TargetInstrInfo.h:424
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:58
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
llvm::AMDGPU::OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:182
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::MachineOperand::isGlobal
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Definition: MachineOperand.h:339
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37