LLVM  14.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "MVETailPredUtils.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/MC/MCAsmInfo.h"
52 #include "llvm/MC/MCInstrDesc.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <new>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "arm-instrinfo"
73 
74 #define GET_INSTRINFO_CTOR_DTOR
75 #include "ARMGenInstrInfo.inc"
76 
77 static cl::opt<bool>
78 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
79  cl::desc("Enable ARM 2-addr to 3-addr conv"));
80 
81 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
82 struct ARM_MLxEntry {
83  uint16_t MLxOpc; // MLA / MLS opcode
84  uint16_t MulOpc; // Expanded multiplication opcode
85  uint16_t AddSubOpc; // Expanded add / sub opcode
86  bool NegAcc; // True if the acc is negated before the add / sub.
87  bool HasLane; // True if instruction has an extra "lane" operand.
88 };
89 
90 static const ARM_MLxEntry ARM_MLxTable[] = {
91  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
92  // fp scalar ops
93  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
94  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
95  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
96  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
97  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
98  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
99  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
100  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
101 
102  // fp SIMD ops
103  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
104  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
105  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
106  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
107  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
108  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
109  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
110  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
111 };
112 
114  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
115  Subtarget(STI) {
116  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
117  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
118  llvm_unreachable("Duplicated entries?");
119  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
120  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
121  }
122 }
123 
124 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
125 // currently defaults to no prepass hazard recognizer.
128  const ScheduleDAG *DAG) const {
129  if (usePreRAHazardRecognizer()) {
130  const InstrItineraryData *II =
131  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
132  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
133  }
135 }
136 
137 // Called during:
138 // - pre-RA scheduling
139 // - post-RA scheduling when FeatureUseMISched is set
141  const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
143 
144  // We would like to restrict this hazard recognizer to only
145  // post-RA scheduling; we can tell that we're post-RA because we don't
146  // track VRegLiveness.
147  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
148  // banks banked on bit 2. Assume that TCMs are in use.
149  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
150  MHR->AddHazardRecognizer(
151  std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
152 
153  // Not inserting ARMHazardRecognizerFPMLx because that would change
154  // legacy behavior
155 
157  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
158  return MHR;
159 }
160 
161 // Called during post-RA scheduling when FeatureUseMISched is not set
164  const ScheduleDAG *DAG) const {
166 
167  if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
168  MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
169 
171  if (BHR)
172  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
173  return MHR;
174 }
175 
178  // FIXME: Thumb2 support.
179 
180  if (!EnableARM3Addr)
181  return nullptr;
182 
183  MachineFunction &MF = *MI.getParent()->getParent();
184  uint64_t TSFlags = MI.getDesc().TSFlags;
185  bool isPre = false;
186  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
187  default: return nullptr;
188  case ARMII::IndexModePre:
189  isPre = true;
190  break;
192  break;
193  }
194 
195  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
196  // operation.
197  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
198  if (MemOpc == 0)
199  return nullptr;
200 
201  MachineInstr *UpdateMI = nullptr;
202  MachineInstr *MemMI = nullptr;
203  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
204  const MCInstrDesc &MCID = MI.getDesc();
205  unsigned NumOps = MCID.getNumOperands();
206  bool isLoad = !MI.mayStore();
207  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
208  const MachineOperand &Base = MI.getOperand(2);
209  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
210  Register WBReg = WB.getReg();
211  Register BaseReg = Base.getReg();
212  Register OffReg = Offset.getReg();
213  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
214  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
215  switch (AddrMode) {
216  default: llvm_unreachable("Unknown indexed op!");
217  case ARMII::AddrMode2: {
218  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
219  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
220  if (OffReg == 0) {
221  if (ARM_AM::getSOImmVal(Amt) == -1)
222  // Can't encode it in a so_imm operand. This transformation will
223  // add more than 1 instruction. Abandon!
224  return nullptr;
225  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
226  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
227  .addReg(BaseReg)
228  .addImm(Amt)
229  .add(predOps(Pred))
230  .add(condCodeOp());
231  } else if (Amt != 0) {
233  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
234  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
235  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
236  .addReg(BaseReg)
237  .addReg(OffReg)
238  .addReg(0)
239  .addImm(SOOpc)
240  .add(predOps(Pred))
241  .add(condCodeOp());
242  } else
243  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
244  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
245  .addReg(BaseReg)
246  .addReg(OffReg)
247  .add(predOps(Pred))
248  .add(condCodeOp());
249  break;
250  }
251  case ARMII::AddrMode3 : {
252  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
253  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
254  if (OffReg == 0)
255  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
256  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
257  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
258  .addReg(BaseReg)
259  .addImm(Amt)
260  .add(predOps(Pred))
261  .add(condCodeOp());
262  else
263  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
264  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
265  .addReg(BaseReg)
266  .addReg(OffReg)
267  .add(predOps(Pred))
268  .add(condCodeOp());
269  break;
270  }
271  }
272 
273  std::vector<MachineInstr*> NewMIs;
274  if (isPre) {
275  if (isLoad)
276  MemMI =
277  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
278  .addReg(WBReg)
279  .addImm(0)
280  .addImm(Pred);
281  else
282  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
283  .addReg(MI.getOperand(1).getReg())
284  .addReg(WBReg)
285  .addReg(0)
286  .addImm(0)
287  .addImm(Pred);
288  NewMIs.push_back(MemMI);
289  NewMIs.push_back(UpdateMI);
290  } else {
291  if (isLoad)
292  MemMI =
293  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
294  .addReg(BaseReg)
295  .addImm(0)
296  .addImm(Pred);
297  else
298  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
299  .addReg(MI.getOperand(1).getReg())
300  .addReg(BaseReg)
301  .addReg(0)
302  .addImm(0)
303  .addImm(Pred);
304  if (WB.isDead())
305  UpdateMI->getOperand(0).setIsDead();
306  NewMIs.push_back(UpdateMI);
307  NewMIs.push_back(MemMI);
308  }
309 
310  // Transfer LiveVariables states, kill / dead info.
311  if (LV) {
312  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
313  MachineOperand &MO = MI.getOperand(i);
314  if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
315  Register Reg = MO.getReg();
316 
318  if (MO.isDef()) {
319  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
320  if (MO.isDead())
321  LV->addVirtualRegisterDead(Reg, *NewMI);
322  }
323  if (MO.isUse() && MO.isKill()) {
324  for (unsigned j = 0; j < 2; ++j) {
325  // Look at the two new MI's in reverse order.
326  MachineInstr *NewMI = NewMIs[j];
327  if (!NewMI->readsRegister(Reg))
328  continue;
329  LV->addVirtualRegisterKilled(Reg, *NewMI);
330  if (VI.removeKill(MI))
331  VI.Kills.push_back(NewMI);
332  break;
333  }
334  }
335  }
336  }
337  }
338 
339  MachineBasicBlock::iterator MBBI = MI.getIterator();
340  MFI->insert(MBBI, NewMIs[1]);
341  MFI->insert(MBBI, NewMIs[0]);
342  return NewMIs[0];
343 }
344 
345 // Branch analysis.
347  MachineBasicBlock *&TBB,
348  MachineBasicBlock *&FBB,
350  bool AllowModify) const {
351  TBB = nullptr;
352  FBB = nullptr;
353 
355  if (I == MBB.instr_begin())
356  return false; // Empty blocks are easy.
357  --I;
358 
359  // Walk backwards from the end of the basic block until the branch is
360  // analyzed or we give up.
361  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
362  // Flag to be raised on unanalyzeable instructions. This is useful in cases
363  // where we want to clean up on the end of the basic block before we bail
364  // out.
365  bool CantAnalyze = false;
366 
367  // Skip over DEBUG values, predicated nonterminators and speculation
368  // barrier terminators.
369  while (I->isDebugInstr() || !I->isTerminator() ||
370  isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
371  I->getOpcode() == ARM::t2DoLoopStartTP){
372  if (I == MBB.instr_begin())
373  return false;
374  --I;
375  }
376 
377  if (isIndirectBranchOpcode(I->getOpcode()) ||
378  isJumpTableBranchOpcode(I->getOpcode())) {
379  // Indirect branches and jump tables can't be analyzed, but we still want
380  // to clean up any instructions at the tail of the basic block.
381  CantAnalyze = true;
382  } else if (isUncondBranchOpcode(I->getOpcode())) {
383  TBB = I->getOperand(0).getMBB();
384  } else if (isCondBranchOpcode(I->getOpcode())) {
385  // Bail out if we encounter multiple conditional branches.
386  if (!Cond.empty())
387  return true;
388 
389  assert(!FBB && "FBB should have been null.");
390  FBB = TBB;
391  TBB = I->getOperand(0).getMBB();
392  Cond.push_back(I->getOperand(1));
393  Cond.push_back(I->getOperand(2));
394  } else if (I->isReturn()) {
395  // Returns can't be analyzed, but we should run cleanup.
396  CantAnalyze = true;
397  } else {
398  // We encountered other unrecognized terminator. Bail out immediately.
399  return true;
400  }
401 
402  // Cleanup code - to be run for unpredicated unconditional branches and
403  // returns.
404  if (!isPredicated(*I) &&
405  (isUncondBranchOpcode(I->getOpcode()) ||
406  isIndirectBranchOpcode(I->getOpcode()) ||
407  isJumpTableBranchOpcode(I->getOpcode()) ||
408  I->isReturn())) {
409  // Forget any previous condition branch information - it no longer applies.
410  Cond.clear();
411  FBB = nullptr;
412 
413  // If we can modify the function, delete everything below this
414  // unconditional branch.
415  if (AllowModify) {
416  MachineBasicBlock::iterator DI = std::next(I);
417  while (DI != MBB.instr_end()) {
418  MachineInstr &InstToDelete = *DI;
419  ++DI;
420  // Speculation barriers must not be deleted.
421  if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
422  continue;
423  InstToDelete.eraseFromParent();
424  }
425  }
426  }
427 
428  if (CantAnalyze) {
429  // We may not be able to analyze the block, but we could still have
430  // an unconditional branch as the last instruction in the block, which
431  // just branches to layout successor. If this is the case, then just
432  // remove it if we're allowed to make modifications.
433  if (AllowModify && !isPredicated(MBB.back()) &&
435  TBB && MBB.isLayoutSuccessor(TBB))
436  removeBranch(MBB);
437  return true;
438  }
439 
440  if (I == MBB.instr_begin())
441  return false;
442 
443  --I;
444  }
445 
446  // We made it past the terminators without bailing out - we must have
447  // analyzed this branch successfully.
448  return false;
449 }
450 
452  int *BytesRemoved) const {
453  assert(!BytesRemoved && "code size not handled");
454 
456  if (I == MBB.end())
457  return 0;
458 
459  if (!isUncondBranchOpcode(I->getOpcode()) &&
460  !isCondBranchOpcode(I->getOpcode()))
461  return 0;
462 
463  // Remove the branch.
464  I->eraseFromParent();
465 
466  I = MBB.end();
467 
468  if (I == MBB.begin()) return 1;
469  --I;
470  if (!isCondBranchOpcode(I->getOpcode()))
471  return 1;
472 
473  // Remove the branch.
474  I->eraseFromParent();
475  return 2;
476 }
477 
479  MachineBasicBlock *TBB,
480  MachineBasicBlock *FBB,
482  const DebugLoc &DL,
483  int *BytesAdded) const {
484  assert(!BytesAdded && "code size not handled");
486  int BOpc = !AFI->isThumbFunction()
487  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
488  int BccOpc = !AFI->isThumbFunction()
489  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
490  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
491 
492  // Shouldn't be a fall through.
493  assert(TBB && "insertBranch must not be told to insert a fallthrough");
494  assert((Cond.size() == 2 || Cond.size() == 0) &&
495  "ARM branch conditions have two components!");
496 
497  // For conditional branches, we use addOperand to preserve CPSR flags.
498 
499  if (!FBB) {
500  if (Cond.empty()) { // Unconditional branch?
501  if (isThumb)
502  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
503  else
504  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
505  } else
506  BuildMI(&MBB, DL, get(BccOpc))
507  .addMBB(TBB)
508  .addImm(Cond[0].getImm())
509  .add(Cond[1]);
510  return 1;
511  }
512 
513  // Two-way conditional branch.
514  BuildMI(&MBB, DL, get(BccOpc))
515  .addMBB(TBB)
516  .addImm(Cond[0].getImm())
517  .add(Cond[1]);
518  if (isThumb)
519  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
520  else
521  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
522  return 2;
523 }
524 
527  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
528  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
529  return false;
530 }
531 
533  if (MI.isBundle()) {
535  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
536  while (++I != E && I->isInsideBundle()) {
537  int PIdx = I->findFirstPredOperandIdx();
538  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
539  return true;
540  }
541  return false;
542  }
543 
544  int PIdx = MI.findFirstPredOperandIdx();
545  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
546 }
547 
549  const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
550  const TargetRegisterInfo *TRI) const {
551 
552  // First, let's see if there is a generic comment for this operand
553  std::string GenericComment =
555  if (!GenericComment.empty())
556  return GenericComment;
557 
558  // If not, check if we have an immediate operand.
559  if (Op.getType() != MachineOperand::MO_Immediate)
560  return std::string();
561 
562  // And print its corresponding condition code if the immediate is a
563  // predicate.
564  int FirstPredOp = MI.findFirstPredOperandIdx();
565  if (FirstPredOp != (int) OpIdx)
566  return std::string();
567 
568  std::string CC = "CC::";
569  CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
570  return CC;
571 }
572 
575  unsigned Opc = MI.getOpcode();
576  if (isUncondBranchOpcode(Opc)) {
577  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
578  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
579  .addImm(Pred[0].getImm())
580  .addReg(Pred[1].getReg());
581  return true;
582  }
583 
584  int PIdx = MI.findFirstPredOperandIdx();
585  if (PIdx != -1) {
586  MachineOperand &PMO = MI.getOperand(PIdx);
587  PMO.setImm(Pred[0].getImm());
588  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
589 
590  // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
591  // IT block. This affects how they are printed.
592  const MCInstrDesc &MCID = MI.getDesc();
594  assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
595  assert((MI.getOperand(1).isDead() ||
596  MI.getOperand(1).getReg() != ARM::CPSR) &&
597  "if conversion tried to stop defining used CPSR");
598  MI.getOperand(1).setReg(ARM::NoRegister);
599  }
600 
601  return true;
602  }
603  return false;
604 }
605 
607  ArrayRef<MachineOperand> Pred2) const {
608  if (Pred1.size() > 2 || Pred2.size() > 2)
609  return false;
610 
611  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
612  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
613  if (CC1 == CC2)
614  return true;
615 
616  switch (CC1) {
617  default:
618  return false;
619  case ARMCC::AL:
620  return true;
621  case ARMCC::HS:
622  return CC2 == ARMCC::HI;
623  case ARMCC::LS:
624  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
625  case ARMCC::GE:
626  return CC2 == ARMCC::GT;
627  case ARMCC::LE:
628  return CC2 == ARMCC::LT;
629  }
630 }
631 
633  std::vector<MachineOperand> &Pred,
634  bool SkipDead) const {
635  bool Found = false;
636  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
637  const MachineOperand &MO = MI.getOperand(i);
638  bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
639  bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
640  if (ClobbersCPSR || IsCPSR) {
641 
642  // Filter out T1 instructions that have a dead CPSR,
643  // allowing IT blocks to be generated containing T1 instructions
644  const MCInstrDesc &MCID = MI.getDesc();
645  if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
646  SkipDead)
647  continue;
648 
649  Pred.push_back(MO);
650  Found = true;
651  }
652  }
653 
654  return Found;
655 }
656 
658  for (const auto &MO : MI.operands())
659  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
660  return true;
661  return false;
662 }
663 
664 static bool isEligibleForITBlock(const MachineInstr *MI) {
665  switch (MI->getOpcode()) {
666  default: return true;
667  case ARM::tADC: // ADC (register) T1
668  case ARM::tADDi3: // ADD (immediate) T1
669  case ARM::tADDi8: // ADD (immediate) T2
670  case ARM::tADDrr: // ADD (register) T1
671  case ARM::tAND: // AND (register) T1
672  case ARM::tASRri: // ASR (immediate) T1
673  case ARM::tASRrr: // ASR (register) T1
674  case ARM::tBIC: // BIC (register) T1
675  case ARM::tEOR: // EOR (register) T1
676  case ARM::tLSLri: // LSL (immediate) T1
677  case ARM::tLSLrr: // LSL (register) T1
678  case ARM::tLSRri: // LSR (immediate) T1
679  case ARM::tLSRrr: // LSR (register) T1
680  case ARM::tMUL: // MUL T1
681  case ARM::tMVN: // MVN (register) T1
682  case ARM::tORR: // ORR (register) T1
683  case ARM::tROR: // ROR (register) T1
684  case ARM::tRSB: // RSB (immediate) T1
685  case ARM::tSBC: // SBC (register) T1
686  case ARM::tSUBi3: // SUB (immediate) T1
687  case ARM::tSUBi8: // SUB (immediate) T2
688  case ARM::tSUBrr: // SUB (register) T1
690  }
691 }
692 
693 /// isPredicable - Return true if the specified instruction can be predicated.
694 /// By default, this returns true for every instruction with a
695 /// PredicateOperand.
697  if (!MI.isPredicable())
698  return false;
699 
700  if (MI.isBundle())
701  return false;
702 
703  if (!isEligibleForITBlock(&MI))
704  return false;
705 
706  const MachineFunction *MF = MI.getParent()->getParent();
707  const ARMFunctionInfo *AFI =
708  MF->getInfo<ARMFunctionInfo>();
709 
710  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
711  // In their ARM encoding, they can't be encoded in a conditional form.
712  if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
713  return false;
714 
715  // Make indirect control flow changes unpredicable when SLS mitigation is
716  // enabled.
717  const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
718  if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
719  return false;
720  if (ST.hardenSlsBlr() && isIndirectCall(MI))
721  return false;
722 
723  if (AFI->isThumb2Function()) {
724  if (getSubtarget().restrictIT())
725  return isV8EligibleForIT(&MI);
726  }
727 
728  return true;
729 }
730 
731 namespace llvm {
732 
733 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
734  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
735  const MachineOperand &MO = MI->getOperand(i);
736  if (!MO.isReg() || MO.isUndef() || MO.isUse())
737  continue;
738  if (MO.getReg() != ARM::CPSR)
739  continue;
740  if (!MO.isDead())
741  return false;
742  }
743  // all definitions of CPSR are dead
744  return true;
745 }
746 
747 } // end namespace llvm
748 
749 /// GetInstSize - Return the size of the specified MachineInstr.
750 ///
752  const MachineBasicBlock &MBB = *MI.getParent();
753  const MachineFunction *MF = MBB.getParent();
754  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
755 
756  const MCInstrDesc &MCID = MI.getDesc();
757  if (MCID.getSize())
758  return MCID.getSize();
759 
760  switch (MI.getOpcode()) {
761  default:
762  // pseudo-instruction sizes are zero.
763  return 0;
764  case TargetOpcode::BUNDLE:
765  return getInstBundleLength(MI);
766  case ARM::MOVi16_ga_pcrel:
767  case ARM::MOVTi16_ga_pcrel:
768  case ARM::t2MOVi16_ga_pcrel:
769  case ARM::t2MOVTi16_ga_pcrel:
770  return 4;
771  case ARM::MOVi32imm:
772  case ARM::t2MOVi32imm:
773  return 8;
774  case ARM::CONSTPOOL_ENTRY:
775  case ARM::JUMPTABLE_INSTS:
776  case ARM::JUMPTABLE_ADDRS:
777  case ARM::JUMPTABLE_TBB:
778  case ARM::JUMPTABLE_TBH:
779  // If this machine instr is a constant pool entry, its size is recorded as
780  // operand #2.
781  return MI.getOperand(2).getImm();
782  case ARM::Int_eh_sjlj_longjmp:
783  return 16;
784  case ARM::tInt_eh_sjlj_longjmp:
785  return 10;
786  case ARM::tInt_WIN_eh_sjlj_longjmp:
787  return 12;
788  case ARM::Int_eh_sjlj_setjmp:
789  case ARM::Int_eh_sjlj_setjmp_nofp:
790  return 20;
791  case ARM::tInt_eh_sjlj_setjmp:
792  case ARM::t2Int_eh_sjlj_setjmp:
793  case ARM::t2Int_eh_sjlj_setjmp_nofp:
794  return 12;
795  case ARM::SPACE:
796  return MI.getOperand(1).getImm();
797  case ARM::INLINEASM:
798  case ARM::INLINEASM_BR: {
799  // If this machine instr is an inline asm, measure it.
800  unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
801  if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
802  Size = alignTo(Size, 4);
803  return Size;
804  }
805  case ARM::SpeculationBarrierISBDSBEndBB:
806  case ARM::t2SpeculationBarrierISBDSBEndBB:
807  // This gets lowered to 2 4-byte instructions.
808  return 8;
809  case ARM::SpeculationBarrierSBEndBB:
810  case ARM::t2SpeculationBarrierSBEndBB:
811  // This gets lowered to 1 4-byte instructions.
812  return 4;
813  }
814 }
815 
816 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
817  unsigned Size = 0;
819  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
820  while (++I != E && I->isInsideBundle()) {
821  assert(!I->isBundle() && "No nested bundle!");
823  }
824  return Size;
825 }
826 
829  unsigned DestReg, bool KillSrc,
830  const ARMSubtarget &Subtarget) const {
831  unsigned Opc = Subtarget.isThumb()
832  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
833  : ARM::MRS;
834 
835  MachineInstrBuilder MIB =
836  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
837 
838  // There is only 1 A/R class MRS instruction, and it always refers to
839  // APSR. However, there are lots of other possibilities on M-class cores.
840  if (Subtarget.isMClass())
841  MIB.addImm(0x800);
842 
843  MIB.add(predOps(ARMCC::AL))
844  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
845 }
846 
849  unsigned SrcReg, bool KillSrc,
850  const ARMSubtarget &Subtarget) const {
851  unsigned Opc = Subtarget.isThumb()
852  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
853  : ARM::MSR;
854 
855  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
856 
857  if (Subtarget.isMClass())
858  MIB.addImm(0x800);
859  else
860  MIB.addImm(8);
861 
862  MIB.addReg(SrcReg, getKillRegState(KillSrc))
865 }
866 
868  MIB.addImm(ARMVCC::None);
869  MIB.addReg(0);
870  MIB.addReg(0); // tp_reg
871 }
872 
874  Register DestReg) {
876  MIB.addReg(DestReg, RegState::Undef);
877 }
878 
880  MIB.addImm(Cond);
881  MIB.addReg(ARM::VPR, RegState::Implicit);
882  MIB.addReg(0); // tp_reg
883 }
884 
886  unsigned Cond, unsigned Inactive) {
888  MIB.addReg(Inactive);
889 }
890 
893  const DebugLoc &DL, MCRegister DestReg,
894  MCRegister SrcReg, bool KillSrc) const {
895  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
896  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
897 
898  if (GPRDest && GPRSrc) {
899  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
900  .addReg(SrcReg, getKillRegState(KillSrc))
902  .add(condCodeOp());
903  return;
904  }
905 
906  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
907  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
908 
909  unsigned Opc = 0;
910  if (SPRDest && SPRSrc)
911  Opc = ARM::VMOVS;
912  else if (GPRDest && SPRSrc)
913  Opc = ARM::VMOVRS;
914  else if (SPRDest && GPRSrc)
915  Opc = ARM::VMOVSR;
916  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
917  Opc = ARM::VMOVD;
918  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
919  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
920 
921  if (Opc) {
922  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
923  MIB.addReg(SrcReg, getKillRegState(KillSrc));
924  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
925  MIB.addReg(SrcReg, getKillRegState(KillSrc));
926  if (Opc == ARM::MVE_VORR)
927  addUnpredicatedMveVpredROp(MIB, DestReg);
928  else
929  MIB.add(predOps(ARMCC::AL));
930  return;
931  }
932 
933  // Handle register classes that require multiple instructions.
934  unsigned BeginIdx = 0;
935  unsigned SubRegs = 0;
936  int Spacing = 1;
937 
938  // Use VORRq when possible.
939  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
940  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
941  BeginIdx = ARM::qsub_0;
942  SubRegs = 2;
943  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
944  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
945  BeginIdx = ARM::qsub_0;
946  SubRegs = 4;
947  // Fall back to VMOVD.
948  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
949  Opc = ARM::VMOVD;
950  BeginIdx = ARM::dsub_0;
951  SubRegs = 2;
952  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
953  Opc = ARM::VMOVD;
954  BeginIdx = ARM::dsub_0;
955  SubRegs = 3;
956  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
957  Opc = ARM::VMOVD;
958  BeginIdx = ARM::dsub_0;
959  SubRegs = 4;
960  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
961  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
962  BeginIdx = ARM::gsub_0;
963  SubRegs = 2;
964  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
965  Opc = ARM::VMOVD;
966  BeginIdx = ARM::dsub_0;
967  SubRegs = 2;
968  Spacing = 2;
969  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
970  Opc = ARM::VMOVD;
971  BeginIdx = ARM::dsub_0;
972  SubRegs = 3;
973  Spacing = 2;
974  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
975  Opc = ARM::VMOVD;
976  BeginIdx = ARM::dsub_0;
977  SubRegs = 4;
978  Spacing = 2;
979  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
980  !Subtarget.hasFP64()) {
981  Opc = ARM::VMOVS;
982  BeginIdx = ARM::ssub_0;
983  SubRegs = 2;
984  } else if (SrcReg == ARM::CPSR) {
985  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
986  return;
987  } else if (DestReg == ARM::CPSR) {
988  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
989  return;
990  } else if (DestReg == ARM::VPR) {
991  assert(ARM::GPRRegClass.contains(SrcReg));
992  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
993  .addReg(SrcReg, getKillRegState(KillSrc))
994  .add(predOps(ARMCC::AL));
995  return;
996  } else if (SrcReg == ARM::VPR) {
997  assert(ARM::GPRRegClass.contains(DestReg));
998  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
999  .addReg(SrcReg, getKillRegState(KillSrc))
1000  .add(predOps(ARMCC::AL));
1001  return;
1002  } else if (DestReg == ARM::FPSCR_NZCV) {
1003  assert(ARM::GPRRegClass.contains(SrcReg));
1004  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1005  .addReg(SrcReg, getKillRegState(KillSrc))
1006  .add(predOps(ARMCC::AL));
1007  return;
1008  } else if (SrcReg == ARM::FPSCR_NZCV) {
1009  assert(ARM::GPRRegClass.contains(DestReg));
1010  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1011  .addReg(SrcReg, getKillRegState(KillSrc))
1012  .add(predOps(ARMCC::AL));
1013  return;
1014  }
1015 
1016  assert(Opc && "Impossible reg-to-reg copy");
1017 
1019  MachineInstrBuilder Mov;
1020 
1021  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1022  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1023  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1024  Spacing = -Spacing;
1025  }
1026 #ifndef NDEBUG
1027  SmallSet<unsigned, 4> DstRegs;
1028 #endif
1029  for (unsigned i = 0; i != SubRegs; ++i) {
1030  Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1031  Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1032  assert(Dst && Src && "Bad sub-register");
1033 #ifndef NDEBUG
1034  assert(!DstRegs.count(Src) && "destructive vector copy");
1035  DstRegs.insert(Dst);
1036 #endif
1037  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1038  // VORR (NEON or MVE) takes two source operands.
1039  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1040  Mov.addReg(Src);
1041  }
1042  // MVE VORR takes predicate operands in place of an ordinary condition.
1043  if (Opc == ARM::MVE_VORR)
1044  addUnpredicatedMveVpredROp(Mov, Dst);
1045  else
1046  Mov = Mov.add(predOps(ARMCC::AL));
1047  // MOVr can set CC.
1048  if (Opc == ARM::MOVr)
1049  Mov = Mov.add(condCodeOp());
1050  }
1051  // Add implicit super-register defs and kills to the last instruction.
1052  Mov->addRegisterDefined(DestReg, TRI);
1053  if (KillSrc)
1054  Mov->addRegisterKilled(SrcReg, TRI);
1055 }
1056 
1059  // VMOVRRD is also a copy instruction but it requires
1060  // special way of handling. It is more complex copy version
1061  // and since that we are not considering it. For recognition
1062  // of such instruction isExtractSubregLike MI interface fuction
1063  // could be used.
1064  // VORRq is considered as a move only if two inputs are
1065  // the same register.
1066  if (!MI.isMoveReg() ||
1067  (MI.getOpcode() == ARM::VORRq &&
1068  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1069  return None;
1070  return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1071 }
1072 
1075  Register Reg) const {
1076  if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1077  Register DstReg = DstSrcPair->Destination->getReg();
1078 
1079  // TODO: We don't handle cases where the forwarding reg is narrower/wider
1080  // than the copy registers. Consider for example:
1081  //
1082  // s16 = VMOVS s0
1083  // s17 = VMOVS s1
1084  // call @callee(d0)
1085  //
1086  // We'd like to describe the call site value of d0 as d8, but this requires
1087  // gathering and merging the descriptions for the two VMOVS instructions.
1088  //
1089  // We also don't handle the reverse situation, where the forwarding reg is
1090  // narrower than the copy destination:
1091  //
1092  // d8 = VMOVD d0
1093  // call @callee(s1)
1094  //
1095  // We need to produce a fragment description (the call site value of s1 is
1096  // /not/ just d8).
1097  if (DstReg != Reg)
1098  return None;
1099  }
1101 }
1102 
1103 const MachineInstrBuilder &
1105  unsigned SubIdx, unsigned State,
1106  const TargetRegisterInfo *TRI) const {
1107  if (!SubIdx)
1108  return MIB.addReg(Reg, State);
1109 
1111  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1112  return MIB.addReg(Reg, State, SubIdx);
1113 }
1114 
1115 void ARMBaseInstrInfo::
1117  Register SrcReg, bool isKill, int FI,
1118  const TargetRegisterClass *RC,
1119  const TargetRegisterInfo *TRI) const {
1120  MachineFunction &MF = *MBB.getParent();
1121  MachineFrameInfo &MFI = MF.getFrameInfo();
1122  Align Alignment = MFI.getObjectAlign(FI);
1123 
1126  MFI.getObjectSize(FI), Alignment);
1127 
1128  switch (TRI->getSpillSize(*RC)) {
1129  case 2:
1130  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1131  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1132  .addReg(SrcReg, getKillRegState(isKill))
1133  .addFrameIndex(FI)
1134  .addImm(0)
1135  .addMemOperand(MMO)
1136  .add(predOps(ARMCC::AL));
1137  } else
1138  llvm_unreachable("Unknown reg class!");
1139  break;
1140  case 4:
1141  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1142  BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1143  .addReg(SrcReg, getKillRegState(isKill))
1144  .addFrameIndex(FI)
1145  .addImm(0)
1146  .addMemOperand(MMO)
1147  .add(predOps(ARMCC::AL));
1148  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1149  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1150  .addReg(SrcReg, getKillRegState(isKill))
1151  .addFrameIndex(FI)
1152  .addImm(0)
1153  .addMemOperand(MMO)
1154  .add(predOps(ARMCC::AL));
1155  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1156  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1157  .addReg(SrcReg, getKillRegState(isKill))
1158  .addFrameIndex(FI)
1159  .addImm(0)
1160  .addMemOperand(MMO)
1161  .add(predOps(ARMCC::AL));
1162  } else
1163  llvm_unreachable("Unknown reg class!");
1164  break;
1165  case 8:
1166  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1167  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1168  .addReg(SrcReg, getKillRegState(isKill))
1169  .addFrameIndex(FI)
1170  .addImm(0)
1171  .addMemOperand(MMO)
1172  .add(predOps(ARMCC::AL));
1173  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1174  if (Subtarget.hasV5TEOps()) {
1176  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1177  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1178  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1179  .add(predOps(ARMCC::AL));
1180  } else {
1181  // Fallback to STM instruction, which has existed since the dawn of
1182  // time.
1183  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1184  .addFrameIndex(FI)
1185  .addMemOperand(MMO)
1186  .add(predOps(ARMCC::AL));
1187  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1188  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1189  }
1190  } else
1191  llvm_unreachable("Unknown reg class!");
1192  break;
1193  case 16:
1194  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1195  // Use aligned spills if the stack can be realigned.
1196  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1197  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1198  .addFrameIndex(FI)
1199  .addImm(16)
1200  .addReg(SrcReg, getKillRegState(isKill))
1201  .addMemOperand(MMO)
1202  .add(predOps(ARMCC::AL));
1203  } else {
1204  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1205  .addReg(SrcReg, getKillRegState(isKill))
1206  .addFrameIndex(FI)
1207  .addMemOperand(MMO)
1208  .add(predOps(ARMCC::AL));
1209  }
1210  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1211  Subtarget.hasMVEIntegerOps()) {
1212  auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1213  MIB.addReg(SrcReg, getKillRegState(isKill))
1214  .addFrameIndex(FI)
1215  .addImm(0)
1216  .addMemOperand(MMO);
1218  } else
1219  llvm_unreachable("Unknown reg class!");
1220  break;
1221  case 24:
1222  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1223  // Use aligned spills if the stack can be realigned.
1224  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1225  Subtarget.hasNEON()) {
1226  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1227  .addFrameIndex(FI)
1228  .addImm(16)
1229  .addReg(SrcReg, getKillRegState(isKill))
1230  .addMemOperand(MMO)
1231  .add(predOps(ARMCC::AL));
1232  } else {
1234  get(ARM::VSTMDIA))
1235  .addFrameIndex(FI)
1236  .add(predOps(ARMCC::AL))
1237  .addMemOperand(MMO);
1238  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1239  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1240  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1241  }
1242  } else
1243  llvm_unreachable("Unknown reg class!");
1244  break;
1245  case 32:
1246  if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1247  ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1248  ARM::DQuadRegClass.hasSubClassEq(RC)) {
1249  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1250  Subtarget.hasNEON()) {
1251  // FIXME: It's possible to only store part of the QQ register if the
1252  // spilled def has a sub-register index.
1253  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1254  .addFrameIndex(FI)
1255  .addImm(16)
1256  .addReg(SrcReg, getKillRegState(isKill))
1257  .addMemOperand(MMO)
1258  .add(predOps(ARMCC::AL));
1259  } else if (Subtarget.hasMVEIntegerOps()) {
1260  BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1261  .addReg(SrcReg, getKillRegState(isKill))
1262  .addFrameIndex(FI)
1263  .addMemOperand(MMO);
1264  } else {
1266  get(ARM::VSTMDIA))
1267  .addFrameIndex(FI)
1268  .add(predOps(ARMCC::AL))
1269  .addMemOperand(MMO);
1270  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1271  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1272  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1273  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1274  }
1275  } else
1276  llvm_unreachable("Unknown reg class!");
1277  break;
1278  case 64:
1279  if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1280  Subtarget.hasMVEIntegerOps()) {
1281  BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1282  .addReg(SrcReg, getKillRegState(isKill))
1283  .addFrameIndex(FI)
1284  .addMemOperand(MMO);
1285  } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1286  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1287  .addFrameIndex(FI)
1288  .add(predOps(ARMCC::AL))
1289  .addMemOperand(MMO);
1290  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1291  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1292  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1293  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1294  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1295  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1296  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1297  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1298  } else
1299  llvm_unreachable("Unknown reg class!");
1300  break;
1301  default:
1302  llvm_unreachable("Unknown reg class!");
1303  }
1304 }
1305 
1307  int &FrameIndex) const {
1308  switch (MI.getOpcode()) {
1309  default: break;
1310  case ARM::STRrs:
1311  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1312  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1313  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1314  MI.getOperand(3).getImm() == 0) {
1315  FrameIndex = MI.getOperand(1).getIndex();
1316  return MI.getOperand(0).getReg();
1317  }
1318  break;
1319  case ARM::STRi12:
1320  case ARM::t2STRi12:
1321  case ARM::tSTRspi:
1322  case ARM::VSTRD:
1323  case ARM::VSTRS:
1324  case ARM::VSTR_P0_off:
1325  case ARM::MVE_VSTRWU32:
1326  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1327  MI.getOperand(2).getImm() == 0) {
1328  FrameIndex = MI.getOperand(1).getIndex();
1329  return MI.getOperand(0).getReg();
1330  }
1331  break;
1332  case ARM::VST1q64:
1333  case ARM::VST1d64TPseudo:
1334  case ARM::VST1d64QPseudo:
1335  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1336  FrameIndex = MI.getOperand(0).getIndex();
1337  return MI.getOperand(2).getReg();
1338  }
1339  break;
1340  case ARM::VSTMQIA:
1341  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1342  FrameIndex = MI.getOperand(1).getIndex();
1343  return MI.getOperand(0).getReg();
1344  }
1345  break;
1346  case ARM::MQQPRStore:
1347  case ARM::MQQQQPRStore:
1348  if (MI.getOperand(1).isFI()) {
1349  FrameIndex = MI.getOperand(1).getIndex();
1350  return MI.getOperand(0).getReg();
1351  }
1352  break;
1353  }
1354 
1355  return 0;
1356 }
1357 
1359  int &FrameIndex) const {
1361  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1362  Accesses.size() == 1) {
1363  FrameIndex =
1364  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1365  ->getFrameIndex();
1366  return true;
1367  }
1368  return false;
1369 }
1370 
1371 void ARMBaseInstrInfo::
1373  Register DestReg, int FI,
1374  const TargetRegisterClass *RC,
1375  const TargetRegisterInfo *TRI) const {
1376  DebugLoc DL;
1377  if (I != MBB.end()) DL = I->getDebugLoc();
1378  MachineFunction &MF = *MBB.getParent();
1379  MachineFrameInfo &MFI = MF.getFrameInfo();
1380  const Align Alignment = MFI.getObjectAlign(FI);
1383  MFI.getObjectSize(FI), Alignment);
1384 
1385  switch (TRI->getSpillSize(*RC)) {
1386  case 2:
1387  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1388  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1389  .addFrameIndex(FI)
1390  .addImm(0)
1391  .addMemOperand(MMO)
1392  .add(predOps(ARMCC::AL));
1393  } else
1394  llvm_unreachable("Unknown reg class!");
1395  break;
1396  case 4:
1397  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1398  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1399  .addFrameIndex(FI)
1400  .addImm(0)
1401  .addMemOperand(MMO)
1402  .add(predOps(ARMCC::AL));
1403  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1404  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1405  .addFrameIndex(FI)
1406  .addImm(0)
1407  .addMemOperand(MMO)
1408  .add(predOps(ARMCC::AL));
1409  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1410  BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1411  .addFrameIndex(FI)
1412  .addImm(0)
1413  .addMemOperand(MMO)
1414  .add(predOps(ARMCC::AL));
1415  } else
1416  llvm_unreachable("Unknown reg class!");
1417  break;
1418  case 8:
1419  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1420  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1421  .addFrameIndex(FI)
1422  .addImm(0)
1423  .addMemOperand(MMO)
1424  .add(predOps(ARMCC::AL));
1425  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1426  MachineInstrBuilder MIB;
1427 
1428  if (Subtarget.hasV5TEOps()) {
1429  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1430  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1431  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1432  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1433  .add(predOps(ARMCC::AL));
1434  } else {
1435  // Fallback to LDM instruction, which has existed since the dawn of
1436  // time.
1437  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1438  .addFrameIndex(FI)
1439  .addMemOperand(MMO)
1440  .add(predOps(ARMCC::AL));
1441  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1442  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1443  }
1444 
1445  if (Register::isPhysicalRegister(DestReg))
1446  MIB.addReg(DestReg, RegState::ImplicitDefine);
1447  } else
1448  llvm_unreachable("Unknown reg class!");
1449  break;
1450  case 16:
1451  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1452  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1453  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1454  .addFrameIndex(FI)
1455  .addImm(16)
1456  .addMemOperand(MMO)
1457  .add(predOps(ARMCC::AL));
1458  } else {
1459  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1460  .addFrameIndex(FI)
1461  .addMemOperand(MMO)
1462  .add(predOps(ARMCC::AL));
1463  }
1464  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1465  Subtarget.hasMVEIntegerOps()) {
1466  auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1467  MIB.addFrameIndex(FI)
1468  .addImm(0)
1469  .addMemOperand(MMO);
1471  } else
1472  llvm_unreachable("Unknown reg class!");
1473  break;
1474  case 24:
1475  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1476  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1477  Subtarget.hasNEON()) {
1478  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1479  .addFrameIndex(FI)
1480  .addImm(16)
1481  .addMemOperand(MMO)
1482  .add(predOps(ARMCC::AL));
1483  } else {
1484  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1485  .addFrameIndex(FI)
1486  .addMemOperand(MMO)
1487  .add(predOps(ARMCC::AL));
1488  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1489  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1490  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1491  if (Register::isPhysicalRegister(DestReg))
1492  MIB.addReg(DestReg, RegState::ImplicitDefine);
1493  }
1494  } else
1495  llvm_unreachable("Unknown reg class!");
1496  break;
1497  case 32:
1498  if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1499  ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1500  ARM::DQuadRegClass.hasSubClassEq(RC)) {
1501  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1502  Subtarget.hasNEON()) {
1503  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1504  .addFrameIndex(FI)
1505  .addImm(16)
1506  .addMemOperand(MMO)
1507  .add(predOps(ARMCC::AL));
1508  } else if (Subtarget.hasMVEIntegerOps()) {
1509  BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1510  .addFrameIndex(FI)
1511  .addMemOperand(MMO);
1512  } else {
1513  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1514  .addFrameIndex(FI)
1515  .add(predOps(ARMCC::AL))
1516  .addMemOperand(MMO);
1517  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1518  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1519  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1520  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1521  if (Register::isPhysicalRegister(DestReg))
1522  MIB.addReg(DestReg, RegState::ImplicitDefine);
1523  }
1524  } else
1525  llvm_unreachable("Unknown reg class!");
1526  break;
1527  case 64:
1528  if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1529  Subtarget.hasMVEIntegerOps()) {
1530  BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1531  .addFrameIndex(FI)
1532  .addMemOperand(MMO);
1533  } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1534  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1535  .addFrameIndex(FI)
1536  .add(predOps(ARMCC::AL))
1537  .addMemOperand(MMO);
1538  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1539  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1540  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1541  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1542  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1543  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1544  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1545  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1546  if (Register::isPhysicalRegister(DestReg))
1547  MIB.addReg(DestReg, RegState::ImplicitDefine);
1548  } else
1549  llvm_unreachable("Unknown reg class!");
1550  break;
1551  default:
1552  llvm_unreachable("Unknown regclass!");
1553  }
1554 }
1555 
1557  int &FrameIndex) const {
1558  switch (MI.getOpcode()) {
1559  default: break;
1560  case ARM::LDRrs:
1561  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1562  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1563  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1564  MI.getOperand(3).getImm() == 0) {
1565  FrameIndex = MI.getOperand(1).getIndex();
1566  return MI.getOperand(0).getReg();
1567  }
1568  break;
1569  case ARM::LDRi12:
1570  case ARM::t2LDRi12:
1571  case ARM::tLDRspi:
1572  case ARM::VLDRD:
1573  case ARM::VLDRS:
1574  case ARM::VLDR_P0_off:
1575  case ARM::MVE_VLDRWU32:
1576  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1577  MI.getOperand(2).getImm() == 0) {
1578  FrameIndex = MI.getOperand(1).getIndex();
1579  return MI.getOperand(0).getReg();
1580  }
1581  break;
1582  case ARM::VLD1q64:
1583  case ARM::VLD1d8TPseudo:
1584  case ARM::VLD1d16TPseudo:
1585  case ARM::VLD1d32TPseudo:
1586  case ARM::VLD1d64TPseudo:
1587  case ARM::VLD1d8QPseudo:
1588  case ARM::VLD1d16QPseudo:
1589  case ARM::VLD1d32QPseudo:
1590  case ARM::VLD1d64QPseudo:
1591  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1592  FrameIndex = MI.getOperand(1).getIndex();
1593  return MI.getOperand(0).getReg();
1594  }
1595  break;
1596  case ARM::VLDMQIA:
1597  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1598  FrameIndex = MI.getOperand(1).getIndex();
1599  return MI.getOperand(0).getReg();
1600  }
1601  break;
1602  case ARM::MQQPRLoad:
1603  case ARM::MQQQQPRLoad:
1604  if (MI.getOperand(1).isFI()) {
1605  FrameIndex = MI.getOperand(1).getIndex();
1606  return MI.getOperand(0).getReg();
1607  }
1608  break;
1609  }
1610 
1611  return 0;
1612 }
1613 
1615  int &FrameIndex) const {
1617  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1618  Accesses.size() == 1) {
1619  FrameIndex =
1620  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1621  ->getFrameIndex();
1622  return true;
1623  }
1624  return false;
1625 }
1626 
1627 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1628 /// depending on whether the result is used.
1629 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1630  bool isThumb1 = Subtarget.isThumb1Only();
1631  bool isThumb2 = Subtarget.isThumb2();
1632  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1633 
1634  DebugLoc dl = MI->getDebugLoc();
1635  MachineBasicBlock *BB = MI->getParent();
1636 
1637  MachineInstrBuilder LDM, STM;
1638  if (isThumb1 || !MI->getOperand(1).isDead()) {
1639  MachineOperand LDWb(MI->getOperand(1));
1640  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1641  : isThumb1 ? ARM::tLDMIA_UPD
1642  : ARM::LDMIA_UPD))
1643  .add(LDWb);
1644  } else {
1645  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1646  }
1647 
1648  if (isThumb1 || !MI->getOperand(0).isDead()) {
1649  MachineOperand STWb(MI->getOperand(0));
1650  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1651  : isThumb1 ? ARM::tSTMIA_UPD
1652  : ARM::STMIA_UPD))
1653  .add(STWb);
1654  } else {
1655  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1656  }
1657 
1658  MachineOperand LDBase(MI->getOperand(3));
1659  LDM.add(LDBase).add(predOps(ARMCC::AL));
1660 
1661  MachineOperand STBase(MI->getOperand(2));
1662  STM.add(STBase).add(predOps(ARMCC::AL));
1663 
1664  // Sort the scratch registers into ascending order.
1666  SmallVector<unsigned, 6> ScratchRegs;
1667  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1668  ScratchRegs.push_back(MI->getOperand(I).getReg());
1669  llvm::sort(ScratchRegs,
1670  [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1671  return TRI.getEncodingValue(Reg1) <
1672  TRI.getEncodingValue(Reg2);
1673  });
1674 
1675  for (const auto &Reg : ScratchRegs) {
1676  LDM.addReg(Reg, RegState::Define);
1677  STM.addReg(Reg, RegState::Kill);
1678  }
1679 
1680  BB->erase(MI);
1681 }
1682 
1684  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1685  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1686  "LOAD_STACK_GUARD currently supported only for MachO.");
1687  expandLoadStackGuard(MI);
1688  MI.getParent()->erase(MI);
1689  return true;
1690  }
1691 
1692  if (MI.getOpcode() == ARM::MEMCPY) {
1693  expandMEMCPY(MI);
1694  return true;
1695  }
1696 
1697  // This hook gets to expand COPY instructions before they become
1698  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1699  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1700  // changed into a VORR that can go down the NEON pipeline.
1701  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1702  return false;
1703 
1704  // Look for a copy between even S-registers. That is where we keep floats
1705  // when using NEON v2f32 instructions for f32 arithmetic.
1706  Register DstRegS = MI.getOperand(0).getReg();
1707  Register SrcRegS = MI.getOperand(1).getReg();
1708  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1709  return false;
1710 
1712  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1713  &ARM::DPRRegClass);
1714  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1715  &ARM::DPRRegClass);
1716  if (!DstRegD || !SrcRegD)
1717  return false;
1718 
1719  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1720  // legal if the COPY already defines the full DstRegD, and it isn't a
1721  // sub-register insertion.
1722  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1723  return false;
1724 
1725  // A dead copy shouldn't show up here, but reject it just in case.
1726  if (MI.getOperand(0).isDead())
1727  return false;
1728 
1729  // All clear, widen the COPY.
1730  LLVM_DEBUG(dbgs() << "widening: " << MI);
1731  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1732 
1733  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1734  // or some other super-register.
1735  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1736  if (ImpDefIdx != -1)
1737  MI.RemoveOperand(ImpDefIdx);
1738 
1739  // Change the opcode and operands.
1740  MI.setDesc(get(ARM::VMOVD));
1741  MI.getOperand(0).setReg(DstRegD);
1742  MI.getOperand(1).setReg(SrcRegD);
1743  MIB.add(predOps(ARMCC::AL));
1744 
1745  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1746  // register scavenger and machine verifier, so we need to indicate that we
1747  // are reading an undefined value from SrcRegD, but a proper value from
1748  // SrcRegS.
1749  MI.getOperand(1).setIsUndef();
1750  MIB.addReg(SrcRegS, RegState::Implicit);
1751 
1752  // SrcRegD may actually contain an unrelated value in the ssub_1
1753  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1754  if (MI.getOperand(1).isKill()) {
1755  MI.getOperand(1).setIsKill(false);
1756  MI.addRegisterKilled(SrcRegS, TRI, true);
1757  }
1758 
1759  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1760  return true;
1761 }
1762 
1763 /// Create a copy of a const pool value. Update CPI to the new index and return
1764 /// the label UID.
1765 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1768 
1769  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1770  assert(MCPE.isMachineConstantPoolEntry() &&
1771  "Expecting a machine constantpool entry!");
1772  ARMConstantPoolValue *ACPV =
1773  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1774 
1775  unsigned PCLabelId = AFI->createPICLabelUId();
1776  ARMConstantPoolValue *NewCPV = nullptr;
1777 
1778  // FIXME: The below assumes PIC relocation model and that the function
1779  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1780  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1781  // instructions, so that's probably OK, but is PIC always correct when
1782  // we get here?
1783  if (ACPV->isGlobalValue())
1785  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1786  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1787  else if (ACPV->isExtSymbol())
1788  NewCPV = ARMConstantPoolSymbol::
1789  Create(MF.getFunction().getContext(),
1790  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1791  else if (ACPV->isBlockAddress())
1792  NewCPV = ARMConstantPoolConstant::
1793  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1795  else if (ACPV->isLSDA())
1796  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1797  ARMCP::CPLSDA, 4);
1798  else if (ACPV->isMachineBasicBlock())
1799  NewCPV = ARMConstantPoolMBB::
1800  Create(MF.getFunction().getContext(),
1801  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1802  else
1803  llvm_unreachable("Unexpected ARM constantpool value type!!");
1804  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1805  return PCLabelId;
1806 }
1807 
1810  Register DestReg, unsigned SubIdx,
1811  const MachineInstr &Orig,
1812  const TargetRegisterInfo &TRI) const {
1813  unsigned Opcode = Orig.getOpcode();
1814  switch (Opcode) {
1815  default: {
1817  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1818  MBB.insert(I, MI);
1819  break;
1820  }
1821  case ARM::tLDRpci_pic:
1822  case ARM::t2LDRpci_pic: {
1823  MachineFunction &MF = *MBB.getParent();
1824  unsigned CPI = Orig.getOperand(1).getIndex();
1825  unsigned PCLabelId = duplicateCPV(MF, CPI);
1826  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1827  .addConstantPoolIndex(CPI)
1828  .addImm(PCLabelId)
1829  .cloneMemRefs(Orig);
1830  break;
1831  }
1832  }
1833 }
1834 
1835 MachineInstr &
1837  MachineBasicBlock::iterator InsertBefore,
1838  const MachineInstr &Orig) const {
1839  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1841  for (;;) {
1842  switch (I->getOpcode()) {
1843  case ARM::tLDRpci_pic:
1844  case ARM::t2LDRpci_pic: {
1845  MachineFunction &MF = *MBB.getParent();
1846  unsigned CPI = I->getOperand(1).getIndex();
1847  unsigned PCLabelId = duplicateCPV(MF, CPI);
1848  I->getOperand(1).setIndex(CPI);
1849  I->getOperand(2).setImm(PCLabelId);
1850  break;
1851  }
1852  }
1853  if (!I->isBundledWithSucc())
1854  break;
1855  ++I;
1856  }
1857  return Cloned;
1858 }
1859 
1861  const MachineInstr &MI1,
1862  const MachineRegisterInfo *MRI) const {
1863  unsigned Opcode = MI0.getOpcode();
1864  if (Opcode == ARM::t2LDRpci ||
1865  Opcode == ARM::t2LDRpci_pic ||
1866  Opcode == ARM::tLDRpci ||
1867  Opcode == ARM::tLDRpci_pic ||
1868  Opcode == ARM::LDRLIT_ga_pcrel ||
1869  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1870  Opcode == ARM::tLDRLIT_ga_pcrel ||
1871  Opcode == ARM::MOV_ga_pcrel ||
1872  Opcode == ARM::MOV_ga_pcrel_ldr ||
1873  Opcode == ARM::t2MOV_ga_pcrel) {
1874  if (MI1.getOpcode() != Opcode)
1875  return false;
1876  if (MI0.getNumOperands() != MI1.getNumOperands())
1877  return false;
1878 
1879  const MachineOperand &MO0 = MI0.getOperand(1);
1880  const MachineOperand &MO1 = MI1.getOperand(1);
1881  if (MO0.getOffset() != MO1.getOffset())
1882  return false;
1883 
1884  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1885  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1886  Opcode == ARM::tLDRLIT_ga_pcrel ||
1887  Opcode == ARM::MOV_ga_pcrel ||
1888  Opcode == ARM::MOV_ga_pcrel_ldr ||
1889  Opcode == ARM::t2MOV_ga_pcrel)
1890  // Ignore the PC labels.
1891  return MO0.getGlobal() == MO1.getGlobal();
1892 
1893  const MachineFunction *MF = MI0.getParent()->getParent();
1894  const MachineConstantPool *MCP = MF->getConstantPool();
1895  int CPI0 = MO0.getIndex();
1896  int CPI1 = MO1.getIndex();
1897  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1898  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1899  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1900  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1901  if (isARMCP0 && isARMCP1) {
1902  ARMConstantPoolValue *ACPV0 =
1903  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1904  ARMConstantPoolValue *ACPV1 =
1905  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1906  return ACPV0->hasSameValue(ACPV1);
1907  } else if (!isARMCP0 && !isARMCP1) {
1908  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1909  }
1910  return false;
1911  } else if (Opcode == ARM::PICLDR) {
1912  if (MI1.getOpcode() != Opcode)
1913  return false;
1914  if (MI0.getNumOperands() != MI1.getNumOperands())
1915  return false;
1916 
1917  Register Addr0 = MI0.getOperand(1).getReg();
1918  Register Addr1 = MI1.getOperand(1).getReg();
1919  if (Addr0 != Addr1) {
1920  if (!MRI || !Register::isVirtualRegister(Addr0) ||
1922  return false;
1923 
1924  // This assumes SSA form.
1925  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1926  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1927  // Check if the loaded value, e.g. a constantpool of a global address, are
1928  // the same.
1929  if (!produceSameValue(*Def0, *Def1, MRI))
1930  return false;
1931  }
1932 
1933  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1934  // %12 = PICLDR %11, 0, 14, %noreg
1935  const MachineOperand &MO0 = MI0.getOperand(i);
1936  const MachineOperand &MO1 = MI1.getOperand(i);
1937  if (!MO0.isIdenticalTo(MO1))
1938  return false;
1939  }
1940  return true;
1941  }
1942 
1944 }
1945 
1946 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1947 /// determine if two loads are loading from the same base address. It should
1948 /// only return true if the base pointers are the same and the only differences
1949 /// between the two addresses is the offset. It also returns the offsets by
1950 /// reference.
1951 ///
1952 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1953 /// is permanently disabled.
1955  int64_t &Offset1,
1956  int64_t &Offset2) const {
1957  // Don't worry about Thumb: just ARM and Thumb2.
1958  if (Subtarget.isThumb1Only()) return false;
1959 
1960  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1961  return false;
1962 
1963  switch (Load1->getMachineOpcode()) {
1964  default:
1965  return false;
1966  case ARM::LDRi12:
1967  case ARM::LDRBi12:
1968  case ARM::LDRD:
1969  case ARM::LDRH:
1970  case ARM::LDRSB:
1971  case ARM::LDRSH:
1972  case ARM::VLDRD:
1973  case ARM::VLDRS:
1974  case ARM::t2LDRi8:
1975  case ARM::t2LDRBi8:
1976  case ARM::t2LDRDi8:
1977  case ARM::t2LDRSHi8:
1978  case ARM::t2LDRi12:
1979  case ARM::t2LDRBi12:
1980  case ARM::t2LDRSHi12:
1981  break;
1982  }
1983 
1984  switch (Load2->getMachineOpcode()) {
1985  default:
1986  return false;
1987  case ARM::LDRi12:
1988  case ARM::LDRBi12:
1989  case ARM::LDRD:
1990  case ARM::LDRH:
1991  case ARM::LDRSB:
1992  case ARM::LDRSH:
1993  case ARM::VLDRD:
1994  case ARM::VLDRS:
1995  case ARM::t2LDRi8:
1996  case ARM::t2LDRBi8:
1997  case ARM::t2LDRSHi8:
1998  case ARM::t2LDRi12:
1999  case ARM::t2LDRBi12:
2000  case ARM::t2LDRSHi12:
2001  break;
2002  }
2003 
2004  // Check if base addresses and chain operands match.
2005  if (Load1->getOperand(0) != Load2->getOperand(0) ||
2006  Load1->getOperand(4) != Load2->getOperand(4))
2007  return false;
2008 
2009  // Index should be Reg0.
2010  if (Load1->getOperand(3) != Load2->getOperand(3))
2011  return false;
2012 
2013  // Determine the offsets.
2014  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
2015  isa<ConstantSDNode>(Load2->getOperand(1))) {
2016  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
2017  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2018  return true;
2019  }
2020 
2021  return false;
2022 }
2023 
2024 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2025 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2026 /// be scheduled togther. On some targets if two loads are loading from
2027 /// addresses in the same cache line, it's better if they are scheduled
2028 /// together. This function takes two integers that represent the load offsets
2029 /// from the common base address. It returns true if it decides it's desirable
2030 /// to schedule the two loads together. "NumLoads" is the number of loads that
2031 /// have already been scheduled after Load1.
2032 ///
2033 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2034 /// is permanently disabled.
2036  int64_t Offset1, int64_t Offset2,
2037  unsigned NumLoads) const {
2038  // Don't worry about Thumb: just ARM and Thumb2.
2039  if (Subtarget.isThumb1Only()) return false;
2040 
2041  assert(Offset2 > Offset1);
2042 
2043  if ((Offset2 - Offset1) / 8 > 64)
2044  return false;
2045 
2046  // Check if the machine opcodes are different. If they are different
2047  // then we consider them to not be of the same base address,
2048  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2049  // In this case, they are considered to be the same because they are different
2050  // encoding forms of the same basic instruction.
2051  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2052  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2053  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2054  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2055  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2056  return false; // FIXME: overly conservative?
2057 
2058  // Four loads in a row should be sufficient.
2059  if (NumLoads >= 3)
2060  return false;
2061 
2062  return true;
2063 }
2064 
2066  const MachineBasicBlock *MBB,
2067  const MachineFunction &MF) const {
2068  // Debug info is never a scheduling boundary. It's necessary to be explicit
2069  // due to the special treatment of IT instructions below, otherwise a
2070  // dbg_value followed by an IT will result in the IT instruction being
2071  // considered a scheduling hazard, which is wrong. It should be the actual
2072  // instruction preceding the dbg_value instruction(s), just like it is
2073  // when debug info is not present.
2074  if (MI.isDebugInstr())
2075  return false;
2076 
2077  // Terminators and labels can't be scheduled around.
2078  if (MI.isTerminator() || MI.isPosition())
2079  return true;
2080 
2081  // INLINEASM_BR can jump to another block
2082  if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2083  return true;
2084 
2085  // Treat the start of the IT block as a scheduling boundary, but schedule
2086  // t2IT along with all instructions following it.
2087  // FIXME: This is a big hammer. But the alternative is to add all potential
2088  // true and anti dependencies to IT block instructions as implicit operands
2089  // to the t2IT instruction. The added compile time and complexity does not
2090  // seem worth it.
2092  // Make sure to skip any debug instructions
2093  while (++I != MBB->end() && I->isDebugInstr())
2094  ;
2095  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2096  return true;
2097 
2098  // Don't attempt to schedule around any instruction that defines
2099  // a stack-oriented pointer, as it's unlikely to be profitable. This
2100  // saves compile time, because it doesn't require every single
2101  // stack slot reference to depend on the instruction that does the
2102  // modification.
2103  // Calls don't actually change the stack pointer, even if they have imp-defs.
2104  // No ARM calling conventions change the stack pointer. (X86 calling
2105  // conventions sometimes do).
2106  if (!MI.isCall() && MI.definesRegister(ARM::SP))
2107  return true;
2108 
2109  return false;
2110 }
2111 
2112 bool ARMBaseInstrInfo::
2114  unsigned NumCycles, unsigned ExtraPredCycles,
2115  BranchProbability Probability) const {
2116  if (!NumCycles)
2117  return false;
2118 
2119  // If we are optimizing for size, see if the branch in the predecessor can be
2120  // lowered to cbn?z by the constant island lowering pass, and return false if
2121  // so. This results in a shorter instruction sequence.
2122  if (MBB.getParent()->getFunction().hasOptSize()) {
2123  MachineBasicBlock *Pred = *MBB.pred_begin();
2124  if (!Pred->empty()) {
2125  MachineInstr *LastMI = &*Pred->rbegin();
2126  if (LastMI->getOpcode() == ARM::t2Bcc) {
2128  MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2129  if (CmpMI)
2130  return false;
2131  }
2132  }
2133  }
2134  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2135  MBB, 0, 0, Probability);
2136 }
2137 
2138 bool ARMBaseInstrInfo::
2140  unsigned TCycles, unsigned TExtra,
2141  MachineBasicBlock &FBB,
2142  unsigned FCycles, unsigned FExtra,
2143  BranchProbability Probability) const {
2144  if (!TCycles)
2145  return false;
2146 
2147  // In thumb code we often end up trading one branch for a IT block, and
2148  // if we are cloning the instruction can increase code size. Prevent
2149  // blocks with multiple predecesors from being ifcvted to prevent this
2150  // cloning.
2151  if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2152  if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2153  return false;
2154  }
2155 
2156  // Attempt to estimate the relative costs of predication versus branching.
2157  // Here we scale up each component of UnpredCost to avoid precision issue when
2158  // scaling TCycles/FCycles by Probability.
2159  const unsigned ScalingUpFactor = 1024;
2160 
2161  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2162  unsigned UnpredCost;
2163  if (!Subtarget.hasBranchPredictor()) {
2164  // When we don't have a branch predictor it's always cheaper to not take a
2165  // branch than take it, so we have to take that into account.
2166  unsigned NotTakenBranchCost = 1;
2167  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2168  unsigned TUnpredCycles, FUnpredCycles;
2169  if (!FCycles) {
2170  // Triangle: TBB is the fallthrough
2171  TUnpredCycles = TCycles + NotTakenBranchCost;
2172  FUnpredCycles = TakenBranchCost;
2173  } else {
2174  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2175  TUnpredCycles = TCycles + TakenBranchCost;
2176  FUnpredCycles = FCycles + NotTakenBranchCost;
2177  // The branch at the end of FBB will disappear when it's predicated, so
2178  // discount it from PredCost.
2179  PredCost -= 1 * ScalingUpFactor;
2180  }
2181  // The total cost is the cost of each path scaled by their probabilites
2182  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2183  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2184  UnpredCost = TUnpredCost + FUnpredCost;
2185  // When predicating assume that the first IT can be folded away but later
2186  // ones cost one cycle each
2187  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2188  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2189  }
2190  } else {
2191  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2192  unsigned FUnpredCost =
2193  Probability.getCompl().scale(FCycles * ScalingUpFactor);
2194  UnpredCost = TUnpredCost + FUnpredCost;
2195  UnpredCost += 1 * ScalingUpFactor; // The branch itself
2196  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2197  }
2198 
2199  return PredCost <= UnpredCost;
2200 }
2201 
2202 unsigned
2204  unsigned NumInsts) const {
2205  // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2206  // ARM has a condition code field in every predicable instruction, using it
2207  // doesn't change code size.
2208  if (!Subtarget.isThumb2())
2209  return 0;
2210 
2211  // It's possible that the size of the IT is restricted to a single block.
2212  unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2213  return divideCeil(NumInsts, MaxInsts) * 2;
2214 }
2215 
2216 unsigned
2218  // If this branch is likely to be folded into the comparison to form a
2219  // CB(N)Z, then removing it won't reduce code size at all, because that will
2220  // just replace the CB(N)Z with a CMP.
2221  if (MI.getOpcode() == ARM::t2Bcc &&
2223  return 0;
2224 
2225  unsigned Size = getInstSizeInBytes(MI);
2226 
2227  // For Thumb2, all branches are 32-bit instructions during the if conversion
2228  // pass, but may be replaced with 16-bit instructions during size reduction.
2229  // Since the branches considered by if conversion tend to be forward branches
2230  // over small basic blocks, they are very likely to be in range for the
2231  // narrow instructions, so we assume the final code size will be half what it
2232  // currently is.
2233  if (Subtarget.isThumb2())
2234  Size /= 2;
2235 
2236  return Size;
2237 }
2238 
2239 bool
2241  MachineBasicBlock &FMBB) const {
2242  // Reduce false anti-dependencies to let the target's out-of-order execution
2243  // engine do its thing.
2244  return Subtarget.isProfitableToUnpredicate();
2245 }
2246 
2247 /// getInstrPredicate - If instruction is predicated, returns its predicate
2248 /// condition, otherwise returns AL. It also returns the condition code
2249 /// register by reference.
2251  Register &PredReg) {
2252  int PIdx = MI.findFirstPredOperandIdx();
2253  if (PIdx == -1) {
2254  PredReg = 0;
2255  return ARMCC::AL;
2256  }
2257 
2258  PredReg = MI.getOperand(PIdx+1).getReg();
2259  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2260 }
2261 
2262 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2263  if (Opc == ARM::B)
2264  return ARM::Bcc;
2265  if (Opc == ARM::tB)
2266  return ARM::tBcc;
2267  if (Opc == ARM::t2B)
2268  return ARM::t2Bcc;
2269 
2270  llvm_unreachable("Unknown unconditional branch opcode!");
2271 }
2272 
2274  bool NewMI,
2275  unsigned OpIdx1,
2276  unsigned OpIdx2) const {
2277  switch (MI.getOpcode()) {
2278  case ARM::MOVCCr:
2279  case ARM::t2MOVCCr: {
2280  // MOVCC can be commuted by inverting the condition.
2281  Register PredReg;
2282  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2283  // MOVCC AL can't be inverted. Shouldn't happen.
2284  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2285  return nullptr;
2286  MachineInstr *CommutedMI =
2287  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2288  if (!CommutedMI)
2289  return nullptr;
2290  // After swapping the MOVCC operands, also invert the condition.
2291  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2293  return CommutedMI;
2294  }
2295  }
2296  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2297 }
2298 
2299 /// Identify instructions that can be folded into a MOVCC instruction, and
2300 /// return the defining instruction.
2301 MachineInstr *
2302 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2303  const TargetInstrInfo *TII) const {
2304  if (!Reg.isVirtual())
2305  return nullptr;
2306  if (!MRI.hasOneNonDBGUse(Reg))
2307  return nullptr;
2309  if (!MI)
2310  return nullptr;
2311  // Check if MI can be predicated and folded into the MOVCC.
2312  if (!isPredicable(*MI))
2313  return nullptr;
2314  // Check if MI has any non-dead defs or physreg uses. This also detects
2315  // predicated instructions which will be reading CPSR.
2316  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2317  const MachineOperand &MO = MI->getOperand(i);
2318  // Reject frame index operands, PEI can't handle the predicated pseudos.
2319  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2320  return nullptr;
2321  if (!MO.isReg())
2322  continue;
2323  // MI can't have any tied operands, that would conflict with predication.
2324  if (MO.isTied())
2325  return nullptr;
2327  return nullptr;
2328  if (MO.isDef() && !MO.isDead())
2329  return nullptr;
2330  }
2331  bool DontMoveAcrossStores = true;
2332  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2333  return nullptr;
2334  return MI;
2335 }
2336 
2339  unsigned &TrueOp, unsigned &FalseOp,
2340  bool &Optimizable) const {
2341  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2342  "Unknown select instruction");
2343  // MOVCC operands:
2344  // 0: Def.
2345  // 1: True use.
2346  // 2: False use.
2347  // 3: Condition code.
2348  // 4: CPSR use.
2349  TrueOp = 1;
2350  FalseOp = 2;
2351  Cond.push_back(MI.getOperand(3));
2352  Cond.push_back(MI.getOperand(4));
2353  // We can always fold a def.
2354  Optimizable = true;
2355  return false;
2356 }
2357 
2358 MachineInstr *
2361  bool PreferFalse) const {
2362  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2363  "Unknown select instruction");
2364  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2365  MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2366  bool Invert = !DefMI;
2367  if (!DefMI)
2368  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2369  if (!DefMI)
2370  return nullptr;
2371 
2372  // Find new register class to use.
2373  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2374  Register DestReg = MI.getOperand(0).getReg();
2375  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2376  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2377  return nullptr;
2378 
2379  // Create a new predicated version of DefMI.
2380  // Rfalse is the first use.
2381  MachineInstrBuilder NewMI =
2382  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2383 
2384  // Copy all the DefMI operands, excluding its (null) predicate.
2385  const MCInstrDesc &DefDesc = DefMI->getDesc();
2386  for (unsigned i = 1, e = DefDesc.getNumOperands();
2387  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2388  NewMI.add(DefMI->getOperand(i));
2389 
2390  unsigned CondCode = MI.getOperand(3).getImm();
2391  if (Invert)
2393  else
2394  NewMI.addImm(CondCode);
2395  NewMI.add(MI.getOperand(4));
2396 
2397  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2398  if (NewMI->hasOptionalDef())
2399  NewMI.add(condCodeOp());
2400 
2401  // The output register value when the predicate is false is an implicit
2402  // register operand tied to the first def.
2403  // The tie makes the register allocator ensure the FalseReg is allocated the
2404  // same register as operand 0.
2405  FalseReg.setImplicit();
2406  NewMI.add(FalseReg);
2407  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2408 
2409  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2410  SeenMIs.insert(NewMI);
2411  SeenMIs.erase(DefMI);
2412 
2413  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2414  // DefMI would be invalid when tranferred inside the loop. Checking for a
2415  // loop is expensive, but at least remove kill flags if they are in different
2416  // BBs.
2417  if (DefMI->getParent() != MI.getParent())
2418  NewMI->clearKillInfo();
2419 
2420  // The caller will erase MI, but not DefMI.
2422  return NewMI;
2423 }
2424 
2425 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2426 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2427 /// def operand.
2428 ///
2429 /// This will go away once we can teach tblgen how to set the optional CPSR def
2430 /// operand itself.
2434 };
2435 
2437  {ARM::ADDSri, ARM::ADDri},
2438  {ARM::ADDSrr, ARM::ADDrr},
2439  {ARM::ADDSrsi, ARM::ADDrsi},
2440  {ARM::ADDSrsr, ARM::ADDrsr},
2441 
2442  {ARM::SUBSri, ARM::SUBri},
2443  {ARM::SUBSrr, ARM::SUBrr},
2444  {ARM::SUBSrsi, ARM::SUBrsi},
2445  {ARM::SUBSrsr, ARM::SUBrsr},
2446 
2447  {ARM::RSBSri, ARM::RSBri},
2448  {ARM::RSBSrsi, ARM::RSBrsi},
2449  {ARM::RSBSrsr, ARM::RSBrsr},
2450 
2451  {ARM::tADDSi3, ARM::tADDi3},
2452  {ARM::tADDSi8, ARM::tADDi8},
2453  {ARM::tADDSrr, ARM::tADDrr},
2454  {ARM::tADCS, ARM::tADC},
2455 
2456  {ARM::tSUBSi3, ARM::tSUBi3},
2457  {ARM::tSUBSi8, ARM::tSUBi8},
2458  {ARM::tSUBSrr, ARM::tSUBrr},
2459  {ARM::tSBCS, ARM::tSBC},
2460  {ARM::tRSBS, ARM::tRSB},
2461  {ARM::tLSLSri, ARM::tLSLri},
2462 
2463  {ARM::t2ADDSri, ARM::t2ADDri},
2464  {ARM::t2ADDSrr, ARM::t2ADDrr},
2465  {ARM::t2ADDSrs, ARM::t2ADDrs},
2466 
2467  {ARM::t2SUBSri, ARM::t2SUBri},
2468  {ARM::t2SUBSrr, ARM::t2SUBrr},
2469  {ARM::t2SUBSrs, ARM::t2SUBrs},
2470 
2471  {ARM::t2RSBSri, ARM::t2RSBri},
2472  {ARM::t2RSBSrs, ARM::t2RSBrs},
2473 };
2474 
2475 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2476  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2477  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2479  return 0;
2480 }
2481 
2484  const DebugLoc &dl, Register DestReg,
2485  Register BaseReg, int NumBytes,
2486  ARMCC::CondCodes Pred, Register PredReg,
2487  const ARMBaseInstrInfo &TII,
2488  unsigned MIFlags) {
2489  if (NumBytes == 0 && DestReg != BaseReg) {
2490  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2491  .addReg(BaseReg, RegState::Kill)
2492  .add(predOps(Pred, PredReg))
2493  .add(condCodeOp())
2494  .setMIFlags(MIFlags);
2495  return;
2496  }
2497 
2498  bool isSub = NumBytes < 0;
2499  if (isSub) NumBytes = -NumBytes;
2500 
2501  while (NumBytes) {
2502  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2503  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2504  assert(ThisVal && "Didn't extract field correctly");
2505 
2506  // We will handle these bits from offset, clear them.
2507  NumBytes &= ~ThisVal;
2508 
2509  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2510 
2511  // Build the new ADD / SUB.
2512  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2513  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2514  .addReg(BaseReg, RegState::Kill)
2515  .addImm(ThisVal)
2516  .add(predOps(Pred, PredReg))
2517  .add(condCodeOp())
2518  .setMIFlags(MIFlags);
2519  BaseReg = DestReg;
2520  }
2521 }
2522 
2525  unsigned NumBytes) {
2526  // This optimisation potentially adds lots of load and store
2527  // micro-operations, it's only really a great benefit to code-size.
2528  if (!Subtarget.hasMinSize())
2529  return false;
2530 
2531  // If only one register is pushed/popped, LLVM can use an LDR/STR
2532  // instead. We can't modify those so make sure we're dealing with an
2533  // instruction we understand.
2534  bool IsPop = isPopOpcode(MI->getOpcode());
2535  bool IsPush = isPushOpcode(MI->getOpcode());
2536  if (!IsPush && !IsPop)
2537  return false;
2538 
2539  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2540  MI->getOpcode() == ARM::VLDMDIA_UPD;
2541  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2542  MI->getOpcode() == ARM::tPOP ||
2543  MI->getOpcode() == ARM::tPOP_RET;
2544 
2545  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2546  MI->getOperand(1).getReg() == ARM::SP)) &&
2547  "trying to fold sp update into non-sp-updating push/pop");
2548 
2549  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2550  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2551  // if this is violated.
2552  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2553  return false;
2554 
2555  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2556  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2557  int RegListIdx = IsT1PushPop ? 2 : 4;
2558 
2559  // Calculate the space we'll need in terms of registers.
2560  unsigned RegsNeeded;
2561  const TargetRegisterClass *RegClass;
2562  if (IsVFPPushPop) {
2563  RegsNeeded = NumBytes / 8;
2564  RegClass = &ARM::DPRRegClass;
2565  } else {
2566  RegsNeeded = NumBytes / 4;
2567  RegClass = &ARM::GPRRegClass;
2568  }
2569 
2570  // We're going to have to strip all list operands off before
2571  // re-adding them since the order matters, so save the existing ones
2572  // for later.
2574 
2575  // We're also going to need the first register transferred by this
2576  // instruction, which won't necessarily be the first register in the list.
2577  unsigned FirstRegEnc = -1;
2578 
2580  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2581  MachineOperand &MO = MI->getOperand(i);
2582  RegList.push_back(MO);
2583 
2584  if (MO.isReg() && !MO.isImplicit() &&
2585  TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2586  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2587  }
2588 
2589  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2590 
2591  // Now try to find enough space in the reglist to allocate NumBytes.
2592  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2593  --CurRegEnc) {
2594  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2595  if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2596  continue;
2597  if (!IsPop) {
2598  // Pushing any register is completely harmless, mark the register involved
2599  // as undef since we don't care about its value and must not restore it
2600  // during stack unwinding.
2601  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2602  false, false, true));
2603  --RegsNeeded;
2604  continue;
2605  }
2606 
2607  // However, we can only pop an extra register if it's not live. For
2608  // registers live within the function we might clobber a return value
2609  // register; the other way a register can be live here is if it's
2610  // callee-saved.
2611  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2612  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2614  // VFP pops don't allow holes in the register list, so any skip is fatal
2615  // for our transformation. GPR pops do, so we should just keep looking.
2616  if (IsVFPPushPop)
2617  return false;
2618  else
2619  continue;
2620  }
2621 
2622  // Mark the unimportant registers as <def,dead> in the POP.
2623  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2624  true));
2625  --RegsNeeded;
2626  }
2627 
2628  if (RegsNeeded > 0)
2629  return false;
2630 
2631  // Finally we know we can profitably perform the optimisation so go
2632  // ahead: strip all existing registers off and add them back again
2633  // in the right order.
2634  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2635  MI->RemoveOperand(i);
2636 
2637  // Add the complete list back in.
2638  MachineInstrBuilder MIB(MF, &*MI);
2639  for (int i = RegList.size() - 1; i >= 0; --i)
2640  MIB.add(RegList[i]);
2641 
2642  return true;
2643 }
2644 
2645 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2646  Register FrameReg, int &Offset,
2647  const ARMBaseInstrInfo &TII) {
2648  unsigned Opcode = MI.getOpcode();
2649  const MCInstrDesc &Desc = MI.getDesc();
2650  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2651  bool isSub = false;
2652 
2653  // Memory operands in inline assembly always use AddrMode2.
2654  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2656 
2657  if (Opcode == ARM::ADDri) {
2658  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2659  if (Offset == 0) {
2660  // Turn it into a move.
2661  MI.setDesc(TII.get(ARM::MOVr));
2662  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2663  MI.RemoveOperand(FrameRegIdx+1);
2664  Offset = 0;
2665  return true;
2666  } else if (Offset < 0) {
2667  Offset = -Offset;
2668  isSub = true;
2669  MI.setDesc(TII.get(ARM::SUBri));
2670  }
2671 
2672  // Common case: small offset, fits into instruction.
2673  if (ARM_AM::getSOImmVal(Offset) != -1) {
2674  // Replace the FrameIndex with sp / fp
2675  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2676  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2677  Offset = 0;
2678  return true;
2679  }
2680 
2681  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2682  // as possible.
2683  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2684  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2685 
2686  // We will handle these bits from offset, clear them.
2687  Offset &= ~ThisImmVal;
2688 
2689  // Get the properly encoded SOImmVal field.
2690  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2691  "Bit extraction didn't work?");
2692  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2693  } else {
2694  unsigned ImmIdx = 0;
2695  int InstrOffs = 0;
2696  unsigned NumBits = 0;
2697  unsigned Scale = 1;
2698  switch (AddrMode) {
2699  case ARMII::AddrMode_i12:
2700  ImmIdx = FrameRegIdx + 1;
2701  InstrOffs = MI.getOperand(ImmIdx).getImm();
2702  NumBits = 12;
2703  break;
2704  case ARMII::AddrMode2:
2705  ImmIdx = FrameRegIdx+2;
2706  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2707  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2708  InstrOffs *= -1;
2709  NumBits = 12;
2710  break;
2711  case ARMII::AddrMode3:
2712  ImmIdx = FrameRegIdx+2;
2713  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2714  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2715  InstrOffs *= -1;
2716  NumBits = 8;
2717  break;
2718  case ARMII::AddrMode4:
2719  case ARMII::AddrMode6:
2720  // Can't fold any offset even if it's zero.
2721  return false;
2722  case ARMII::AddrMode5:
2723  ImmIdx = FrameRegIdx+1;
2724  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2725  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2726  InstrOffs *= -1;
2727  NumBits = 8;
2728  Scale = 4;
2729  break;
2730  case ARMII::AddrMode5FP16:
2731  ImmIdx = FrameRegIdx+1;
2732  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2733  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2734  InstrOffs *= -1;
2735  NumBits = 8;
2736  Scale = 2;
2737  break;
2738  case ARMII::AddrModeT2_i7:
2741  ImmIdx = FrameRegIdx+1;
2742  InstrOffs = MI.getOperand(ImmIdx).getImm();
2743  NumBits = 7;
2744  Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2745  AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2746  break;
2747  default:
2748  llvm_unreachable("Unsupported addressing mode!");
2749  }
2750 
2751  Offset += InstrOffs * Scale;
2752  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2753  if (Offset < 0) {
2754  Offset = -Offset;
2755  isSub = true;
2756  }
2757 
2758  // Attempt to fold address comp. if opcode has offset bits
2759  if (NumBits > 0) {
2760  // Common case: small offset, fits into instruction.
2761  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2762  int ImmedOffset = Offset / Scale;
2763  unsigned Mask = (1 << NumBits) - 1;
2764  if ((unsigned)Offset <= Mask * Scale) {
2765  // Replace the FrameIndex with sp
2766  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2767  // FIXME: When addrmode2 goes away, this will simplify (like the
2768  // T2 version), as the LDR.i12 versions don't need the encoding
2769  // tricks for the offset value.
2770  if (isSub) {
2772  ImmedOffset = -ImmedOffset;
2773  else
2774  ImmedOffset |= 1 << NumBits;
2775  }
2776  ImmOp.ChangeToImmediate(ImmedOffset);
2777  Offset = 0;
2778  return true;
2779  }
2780 
2781  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2782  ImmedOffset = ImmedOffset & Mask;
2783  if (isSub) {
2785  ImmedOffset = -ImmedOffset;
2786  else
2787  ImmedOffset |= 1 << NumBits;
2788  }
2789  ImmOp.ChangeToImmediate(ImmedOffset);
2790  Offset &= ~(Mask*Scale);
2791  }
2792  }
2793 
2794  Offset = (isSub) ? -Offset : Offset;
2795  return Offset == 0;
2796 }
2797 
2798 /// analyzeCompare - For a comparison instruction, return the source registers
2799 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2800 /// compares against in CmpValue. Return true if the comparison instruction
2801 /// can be analyzed.
2803  Register &SrcReg2, int64_t &CmpMask,
2804  int64_t &CmpValue) const {
2805  switch (MI.getOpcode()) {
2806  default: break;
2807  case ARM::CMPri:
2808  case ARM::t2CMPri:
2809  case ARM::tCMPi8:
2810  SrcReg = MI.getOperand(0).getReg();
2811  SrcReg2 = 0;
2812  CmpMask = ~0;
2813  CmpValue = MI.getOperand(1).getImm();
2814  return true;
2815  case ARM::CMPrr:
2816  case ARM::t2CMPrr:
2817  case ARM::tCMPr:
2818  SrcReg = MI.getOperand(0).getReg();
2819  SrcReg2 = MI.getOperand(1).getReg();
2820  CmpMask = ~0;
2821  CmpValue = 0;
2822  return true;
2823  case ARM::TSTri:
2824  case ARM::t2TSTri:
2825  SrcReg = MI.getOperand(0).getReg();
2826  SrcReg2 = 0;
2827  CmpMask = MI.getOperand(1).getImm();
2828  CmpValue = 0;
2829  return true;
2830  }
2831 
2832  return false;
2833 }
2834 
2835 /// isSuitableForMask - Identify a suitable 'and' instruction that
2836 /// operates on the given source register and applies the same mask
2837 /// as a 'tst' instruction. Provide a limited look-through for copies.
2838 /// When successful, MI will hold the found instruction.
2840  int CmpMask, bool CommonUse) {
2841  switch (MI->getOpcode()) {
2842  case ARM::ANDri:
2843  case ARM::t2ANDri:
2844  if (CmpMask != MI->getOperand(2).getImm())
2845  return false;
2846  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2847  return true;
2848  break;
2849  }
2850 
2851  return false;
2852 }
2853 
2854 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2855 /// the condition code if we modify the instructions such that flags are
2856 /// set by ADD(a,b,X).
2858  switch (CC) {
2859  default: return ARMCC::AL;
2860  case ARMCC::HS: return ARMCC::LO;
2861  case ARMCC::LO: return ARMCC::HS;
2862  case ARMCC::VS: return ARMCC::VS;
2863  case ARMCC::VC: return ARMCC::VC;
2864  }
2865 }
2866 
2867 /// isRedundantFlagInstr - check whether the first instruction, whose only
2868 /// purpose is to update flags, can be made redundant.
2869 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2870 /// CMPri can be made redundant by SUBri if the operands are the same.
2871 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2872 /// This function can be extended later on.
2873 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2874  Register SrcReg, Register SrcReg2,
2875  int64_t ImmValue,
2876  const MachineInstr *OI,
2877  bool &IsThumb1) {
2878  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2879  (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2880  ((OI->getOperand(1).getReg() == SrcReg &&
2881  OI->getOperand(2).getReg() == SrcReg2) ||
2882  (OI->getOperand(1).getReg() == SrcReg2 &&
2883  OI->getOperand(2).getReg() == SrcReg))) {
2884  IsThumb1 = false;
2885  return true;
2886  }
2887 
2888  if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2889  ((OI->getOperand(2).getReg() == SrcReg &&
2890  OI->getOperand(3).getReg() == SrcReg2) ||
2891  (OI->getOperand(2).getReg() == SrcReg2 &&
2892  OI->getOperand(3).getReg() == SrcReg))) {
2893  IsThumb1 = true;
2894  return true;
2895  }
2896 
2897  if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2898  (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2899  OI->getOperand(1).getReg() == SrcReg &&
2900  OI->getOperand(2).getImm() == ImmValue) {
2901  IsThumb1 = false;
2902  return true;
2903  }
2904 
2905  if (CmpI->getOpcode() == ARM::tCMPi8 &&
2906  (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2907  OI->getOperand(2).getReg() == SrcReg &&
2908  OI->getOperand(3).getImm() == ImmValue) {
2909  IsThumb1 = true;
2910  return true;
2911  }
2912 
2913  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2914  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2915  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2916  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2917  OI->getOperand(0).getReg() == SrcReg &&
2918  OI->getOperand(1).getReg() == SrcReg2) {
2919  IsThumb1 = false;
2920  return true;
2921  }
2922 
2923  if (CmpI->getOpcode() == ARM::tCMPr &&
2924  (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2925  OI->getOpcode() == ARM::tADDrr) &&
2926  OI->getOperand(0).getReg() == SrcReg &&
2927  OI->getOperand(2).getReg() == SrcReg2) {
2928  IsThumb1 = true;
2929  return true;
2930  }
2931 
2932  return false;
2933 }
2934 
2935 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2936  switch (MI->getOpcode()) {
2937  default: return false;
2938  case ARM::tLSLri:
2939  case ARM::tLSRri:
2940  case ARM::tLSLrr:
2941  case ARM::tLSRrr:
2942  case ARM::tSUBrr:
2943  case ARM::tADDrr:
2944  case ARM::tADDi3:
2945  case ARM::tADDi8:
2946  case ARM::tSUBi3:
2947  case ARM::tSUBi8:
2948  case ARM::tMUL:
2949  case ARM::tADC:
2950  case ARM::tSBC:
2951  case ARM::tRSB:
2952  case ARM::tAND:
2953  case ARM::tORR:
2954  case ARM::tEOR:
2955  case ARM::tBIC:
2956  case ARM::tMVN:
2957  case ARM::tASRri:
2958  case ARM::tASRrr:
2959  case ARM::tROR:
2960  IsThumb1 = true;
2962  case ARM::RSBrr:
2963  case ARM::RSBri:
2964  case ARM::RSCrr:
2965  case ARM::RSCri:
2966  case ARM::ADDrr:
2967  case ARM::ADDri:
2968  case ARM::ADCrr:
2969  case ARM::ADCri:
2970  case ARM::SUBrr:
2971  case ARM::SUBri:
2972  case ARM::SBCrr:
2973  case ARM::SBCri:
2974  case ARM::t2RSBri:
2975  case ARM::t2ADDrr:
2976  case ARM::t2ADDri:
2977  case ARM::t2ADCrr:
2978  case ARM::t2ADCri:
2979  case ARM::t2SUBrr:
2980  case ARM::t2SUBri:
2981  case ARM::t2SBCrr:
2982  case ARM::t2SBCri:
2983  case ARM::ANDrr:
2984  case ARM::ANDri:
2985  case ARM::t2ANDrr:
2986  case ARM::t2ANDri:
2987  case ARM::ORRrr:
2988  case ARM::ORRri:
2989  case ARM::t2ORRrr:
2990  case ARM::t2ORRri:
2991  case ARM::EORrr:
2992  case ARM::EORri:
2993  case ARM::t2EORrr:
2994  case ARM::t2EORri:
2995  case ARM::t2LSRri:
2996  case ARM::t2LSRrr:
2997  case ARM::t2LSLri:
2998  case ARM::t2LSLrr:
2999  return true;
3000  }
3001 }
3002 
3003 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
3004 /// comparison into one that sets the zero bit in the flags register;
3005 /// Remove a redundant Compare instruction if an earlier instruction can set the
3006 /// flags in the same way as Compare.
3007 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3008 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3009 /// condition code of instructions which use the flags.
3011  MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3012  int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3013  // Get the unique definition of SrcReg.
3014  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3015  if (!MI) return false;
3016 
3017  // Masked compares sometimes use the same register as the corresponding 'and'.
3018  if (CmpMask != ~0) {
3019  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3020  MI = nullptr;
3022  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3023  UI != UE; ++UI) {
3024  if (UI->getParent() != CmpInstr.getParent())
3025  continue;
3026  MachineInstr *PotentialAND = &*UI;
3027  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3028  isPredicated(*PotentialAND))
3029  continue;
3030  MI = PotentialAND;
3031  break;
3032  }
3033  if (!MI) return false;
3034  }
3035  }
3036 
3037  // Get ready to iterate backward from CmpInstr.
3038  MachineBasicBlock::iterator I = CmpInstr, E = MI,
3039  B = CmpInstr.getParent()->begin();
3040 
3041  // Early exit if CmpInstr is at the beginning of the BB.
3042  if (I == B) return false;
3043 
3044  // There are two possible candidates which can be changed to set CPSR:
3045  // One is MI, the other is a SUB or ADD instruction.
3046  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3047  // ADDr[ri](r1, r2, X).
3048  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3049  MachineInstr *SubAdd = nullptr;
3050  if (SrcReg2 != 0)
3051  // MI is not a candidate for CMPrr.
3052  MI = nullptr;
3053  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3054  // Conservatively refuse to convert an instruction which isn't in the same
3055  // BB as the comparison.
3056  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3057  // Thus we cannot return here.
3058  if (CmpInstr.getOpcode() == ARM::CMPri ||
3059  CmpInstr.getOpcode() == ARM::t2CMPri ||
3060  CmpInstr.getOpcode() == ARM::tCMPi8)
3061  MI = nullptr;
3062  else
3063  return false;
3064  }
3065 
3066  bool IsThumb1 = false;
3067  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3068  return false;
3069 
3070  // We also want to do this peephole for cases like this: if (a*b == 0),
3071  // and optimise away the CMP instruction from the generated code sequence:
3072  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3073  // resulting from the select instruction, but these MOVS instructions for
3074  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3075  // However, if we only have MOVS instructions in between the CMP and the
3076  // other instruction (the MULS in this example), then the CPSR is dead so we
3077  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3078  // reordering and then continue the analysis hoping we can eliminate the
3079  // CMP. This peephole works on the vregs, so is still in SSA form. As a
3080  // consequence, the movs won't redefine/kill the MUL operands which would
3081  // make this reordering illegal.
3083  if (MI && IsThumb1) {
3084  --I;
3085  if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3086  bool CanReorder = true;
3087  for (; I != E; --I) {
3088  if (I->getOpcode() != ARM::tMOVi8) {
3089  CanReorder = false;
3090  break;
3091  }
3092  }
3093  if (CanReorder) {
3094  MI = MI->removeFromParent();
3095  E = CmpInstr;
3096  CmpInstr.getParent()->insert(E, MI);
3097  }
3098  }
3099  I = CmpInstr;
3100  E = MI;
3101  }
3102 
3103  // Check that CPSR isn't set between the comparison instruction and the one we
3104  // want to change. At the same time, search for SubAdd.
3105  bool SubAddIsThumb1 = false;
3106  do {
3107  const MachineInstr &Instr = *--I;
3108 
3109  // Check whether CmpInstr can be made redundant by the current instruction.
3110  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3111  SubAddIsThumb1)) {
3112  SubAdd = &*I;
3113  break;
3114  }
3115 
3116  // Allow E (which was initially MI) to be SubAdd but do not search before E.
3117  if (I == E)
3118  break;
3119 
3120  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3121  Instr.readsRegister(ARM::CPSR, TRI))
3122  // This instruction modifies or uses CPSR after the one we want to
3123  // change. We can't do this transformation.
3124  return false;
3125 
3126  if (I == B) {
3127  // In some cases, we scan the use-list of an instruction for an AND;
3128  // that AND is in the same BB, but may not be scheduled before the
3129  // corresponding TST. In that case, bail out.
3130  //
3131  // FIXME: We could try to reschedule the AND.
3132  return false;
3133  }
3134  } while (true);
3135 
3136  // Return false if no candidates exist.
3137  if (!MI && !SubAdd)
3138  return false;
3139 
3140  // If we found a SubAdd, use it as it will be closer to the CMP
3141  if (SubAdd) {
3142  MI = SubAdd;
3143  IsThumb1 = SubAddIsThumb1;
3144  }
3145 
3146  // We can't use a predicated instruction - it doesn't always write the flags.
3147  if (isPredicated(*MI))
3148  return false;
3149 
3150  // Scan forward for the use of CPSR
3151  // When checking against MI: if it's a conditional code that requires
3152  // checking of the V bit or C bit, then this is not safe to do.
3153  // It is safe to remove CmpInstr if CPSR is redefined or killed.
3154  // If we are done with the basic block, we need to check whether CPSR is
3155  // live-out.
3157  OperandsToUpdate;
3158  bool isSafe = false;
3159  I = CmpInstr;
3160  E = CmpInstr.getParent()->end();
3161  while (!isSafe && ++I != E) {
3162  const MachineInstr &Instr = *I;
3163  for (unsigned IO = 0, EO = Instr.getNumOperands();
3164  !isSafe && IO != EO; ++IO) {
3165  const MachineOperand &MO = Instr.getOperand(IO);
3166  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3167  isSafe = true;
3168  break;
3169  }
3170  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3171  continue;
3172  if (MO.isDef()) {
3173  isSafe = true;
3174  break;
3175  }
3176  // Condition code is after the operand before CPSR except for VSELs.
3177  ARMCC::CondCodes CC;
3178  bool IsInstrVSel = true;
3179  switch (Instr.getOpcode()) {
3180  default:
3181  IsInstrVSel = false;
3182  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3183  break;
3184  case ARM::VSELEQD:
3185  case ARM::VSELEQS:
3186  case ARM::VSELEQH:
3187  CC = ARMCC::EQ;
3188  break;
3189  case ARM::VSELGTD:
3190  case ARM::VSELGTS:
3191  case ARM::VSELGTH:
3192  CC = ARMCC::GT;
3193  break;
3194  case ARM::VSELGED:
3195  case ARM::VSELGES:
3196  case ARM::VSELGEH:
3197  CC = ARMCC::GE;
3198  break;
3199  case ARM::VSELVSD:
3200  case ARM::VSELVSS:
3201  case ARM::VSELVSH:
3202  CC = ARMCC::VS;
3203  break;
3204  }
3205 
3206  if (SubAdd) {
3207  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3208  // on CMP needs to be updated to be based on SUB.
3209  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3210  // needs to be modified.
3211  // Push the condition code operands to OperandsToUpdate.
3212  // If it is safe to remove CmpInstr, the condition code of these
3213  // operands will be modified.
3214  unsigned Opc = SubAdd->getOpcode();
3215  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3216  Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3217  Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3218  Opc == ARM::tSUBi8;
3219  unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3220  if (!IsSub ||
3221  (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3222  SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3223  // VSel doesn't support condition code update.
3224  if (IsInstrVSel)
3225  return false;
3226  // Ensure we can swap the condition.
3227  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3228  if (NewCC == ARMCC::AL)
3229  return false;
3230  OperandsToUpdate.push_back(
3231  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3232  }
3233  } else {
3234  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3235  switch (CC) {
3236  case ARMCC::EQ: // Z
3237  case ARMCC::NE: // Z
3238  case ARMCC::MI: // N
3239  case ARMCC::PL: // N
3240  case ARMCC::AL: // none
3241  // CPSR can be used multiple times, we should continue.
3242  break;
3243  case ARMCC::HS: // C
3244  case ARMCC::LO: // C
3245  case ARMCC::VS: // V
3246  case ARMCC::VC: // V
3247  case ARMCC::HI: // C Z
3248  case ARMCC::LS: // C Z
3249  case ARMCC::GE: // N V
3250  case ARMCC::LT: // N V
3251  case ARMCC::GT: // Z N V
3252  case ARMCC::LE: // Z N V
3253  // The instruction uses the V bit or C bit which is not safe.
3254  return false;
3255  }
3256  }
3257  }
3258  }
3259 
3260  // If CPSR is not killed nor re-defined, we should check whether it is
3261  // live-out. If it is live-out, do not optimize.
3262  if (!isSafe) {
3263  MachineBasicBlock *MBB = CmpInstr.getParent();
3265  SE = MBB->succ_end(); SI != SE; ++SI)
3266  if ((*SI)->isLiveIn(ARM::CPSR))
3267  return false;
3268  }
3269 
3270  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3271  // set CPSR so this is represented as an explicit output)
3272  if (!IsThumb1) {
3273  MI->getOperand(5).setReg(ARM::CPSR);
3274  MI->getOperand(5).setIsDef(true);
3275  }
3276  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3277  CmpInstr.eraseFromParent();
3278 
3279  // Modify the condition code of operands in OperandsToUpdate.
3280  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3281  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3282  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3283  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3284 
3285  MI->clearRegisterDeads(ARM::CPSR);
3286 
3287  return true;
3288 }
3289 
3291  // Do not sink MI if it might be used to optimize a redundant compare.
3292  // We heuristically only look at the instruction immediately following MI to
3293  // avoid potentially searching the entire basic block.
3294  if (isPredicated(MI))
3295  return true;
3297  ++Next;
3298  Register SrcReg, SrcReg2;
3299  int64_t CmpMask, CmpValue;
3300  bool IsThumb1;
3301  if (Next != MI.getParent()->end() &&
3302  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3303  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3304  return false;
3305  return true;
3306 }
3307 
3309  Register Reg,
3310  MachineRegisterInfo *MRI) const {
3311  // Fold large immediates into add, sub, or, xor.
3312  unsigned DefOpc = DefMI.getOpcode();
3313  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
3314  return false;
3315  if (!DefMI.getOperand(1).isImm())
3316  // Could be t2MOVi32imm @xx
3317  return false;
3318 
3319  if (!MRI->hasOneNonDBGUse(Reg))
3320  return false;
3321 
3322  const MCInstrDesc &DefMCID = DefMI.getDesc();
3323  if (DefMCID.hasOptionalDef()) {
3324  unsigned NumOps = DefMCID.getNumOperands();
3325  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3326  if (MO.getReg() == ARM::CPSR && !MO.isDead())
3327  // If DefMI defines CPSR and it is not dead, it's obviously not safe
3328  // to delete DefMI.
3329  return false;
3330  }
3331 
3332  const MCInstrDesc &UseMCID = UseMI.getDesc();
3333  if (UseMCID.hasOptionalDef()) {
3334  unsigned NumOps = UseMCID.getNumOperands();
3335  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3336  // If the instruction sets the flag, do not attempt this optimization
3337  // since it may change the semantics of the code.
3338  return false;
3339  }
3340 
3341  unsigned UseOpc = UseMI.getOpcode();
3342  unsigned NewUseOpc = 0;
3343  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3344  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3345  bool Commute = false;
3346  switch (UseOpc) {
3347  default: return false;
3348  case ARM::SUBrr:
3349  case ARM::ADDrr:
3350  case ARM::ORRrr:
3351  case ARM::EORrr:
3352  case ARM::t2SUBrr:
3353  case ARM::t2ADDrr:
3354  case ARM::t2ORRrr:
3355  case ARM::t2EORrr: {
3356  Commute = UseMI.getOperand(2).getReg() != Reg;
3357  switch (UseOpc) {
3358  default: break;
3359  case ARM::ADDrr:
3360  case ARM::SUBrr:
3361  if (UseOpc == ARM::SUBrr && Commute)
3362  return false;
3363 
3364  // ADD/SUB are special because they're essentially the same operation, so
3365  // we can handle a larger range of immediates.
3366  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3367  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3368  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3369  ImmVal = -ImmVal;
3370  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3371  } else
3372  return false;
3373  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3374  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3375  break;
3376  case ARM::ORRrr:
3377  case ARM::EORrr:
3378  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3379  return false;
3380  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3381  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3382  switch (UseOpc) {
3383  default: break;
3384  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3385  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3386  }
3387  break;
3388  case ARM::t2ADDrr:
3389  case ARM::t2SUBrr: {
3390  if (UseOpc == ARM::t2SUBrr && Commute)
3391  return false;
3392 
3393  // ADD/SUB are special because they're essentially the same operation, so
3394  // we can handle a larger range of immediates.
3395  const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3396  const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3397  const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3398  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3399  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3400  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3401  ImmVal = -ImmVal;
3402  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3403  } else
3404  return false;
3405  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3406  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3407  break;
3408  }
3409  case ARM::t2ORRrr:
3410  case ARM::t2EORrr:
3411  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3412  return false;
3413  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3414  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3415  switch (UseOpc) {
3416  default: break;
3417  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3418  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3419  }
3420  break;
3421  }
3422  }
3423  }
3424 
3425  unsigned OpIdx = Commute ? 2 : 1;
3426  Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3427  bool isKill = UseMI.getOperand(OpIdx).isKill();
3428  const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3429  Register NewReg = MRI->createVirtualRegister(TRC);
3430  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3431  NewReg)
3432  .addReg(Reg1, getKillRegState(isKill))
3433  .addImm(SOImmValV1)
3434  .add(predOps(ARMCC::AL))
3435  .add(condCodeOp());
3436  UseMI.setDesc(get(NewUseOpc));
3437  UseMI.getOperand(1).setReg(NewReg);
3438  UseMI.getOperand(1).setIsKill();
3439  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3440  DefMI.eraseFromParent();
3441  // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3442  // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3443  // Then the below code will not be needed, as the input/output register
3444  // classes will be rgpr or gprSP.
3445  // For now, we fix the UseMI operand explicitly here:
3446  switch(NewUseOpc){
3447  case ARM::t2ADDspImm:
3448  case ARM::t2SUBspImm:
3449  case ARM::t2ADDri:
3450  case ARM::t2SUBri:
3451  MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3452  }
3453  return true;
3454 }
3455 
3456 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3457  const MachineInstr &MI) {
3458  switch (MI.getOpcode()) {
3459  default: {
3460  const MCInstrDesc &Desc = MI.getDesc();
3461  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3462  assert(UOps >= 0 && "bad # UOps");
3463  return UOps;
3464  }
3465 
3466  case ARM::LDRrs:
3467  case ARM::LDRBrs:
3468  case ARM::STRrs:
3469  case ARM::STRBrs: {
3470  unsigned ShOpVal = MI.getOperand(3).getImm();
3471  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3472  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3473  if (!isSub &&
3474  (ShImm == 0 ||
3475  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3476  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3477  return 1;
3478  return 2;
3479  }
3480 
3481  case ARM::LDRH:
3482  case ARM::STRH: {
3483  if (!MI.getOperand(2).getReg())
3484  return 1;
3485 
3486  unsigned ShOpVal = MI.getOperand(3).getImm();
3487  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3488  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3489  if (!isSub &&
3490  (ShImm == 0 ||
3491  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3492  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3493  return 1;
3494  return 2;
3495  }
3496 
3497  case ARM::LDRSB:
3498  case ARM::LDRSH:
3499  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3500 
3501  case ARM::LDRSB_POST:
3502  case ARM::LDRSH_POST: {
3503  Register Rt = MI.getOperand(0).getReg();
3504  Register Rm = MI.getOperand(3).getReg();
3505  return (Rt == Rm) ? 4 : 3;
3506  }
3507 
3508  case ARM::LDR_PRE_REG:
3509  case ARM::LDRB_PRE_REG: {
3510  Register Rt = MI.getOperand(0).getReg();
3511  Register Rm = MI.getOperand(3).getReg();
3512  if (Rt == Rm)
3513  return 3;
3514  unsigned ShOpVal = MI.getOperand(4).getImm();
3515  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3516  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3517  if (!isSub &&
3518  (ShImm == 0 ||
3519  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3520  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3521  return 2;
3522  return 3;
3523  }
3524 
3525  case ARM::STR_PRE_REG:
3526  case ARM::STRB_PRE_REG: {
3527  unsigned ShOpVal = MI.getOperand(4).getImm();
3528  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3529  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3530  if (!isSub &&
3531  (ShImm == 0 ||
3532  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3533  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3534  return 2;
3535  return 3;
3536  }
3537 
3538  case ARM::LDRH_PRE:
3539  case ARM::STRH_PRE: {
3540  Register Rt = MI.getOperand(0).getReg();
3541  Register Rm = MI.getOperand(3).getReg();
3542  if (!Rm)
3543  return 2;
3544  if (Rt == Rm)
3545  return 3;
3546  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3547  }
3548 
3549  case ARM::LDR_POST_REG:
3550  case ARM::LDRB_POST_REG:
3551  case ARM::LDRH_POST: {
3552  Register Rt = MI.getOperand(0).getReg();
3553  Register Rm = MI.getOperand(3).getReg();
3554  return (Rt == Rm) ? 3 : 2;
3555  }
3556 
3557  case ARM::LDR_PRE_IMM:
3558  case ARM::LDRB_PRE_IMM:
3559  case ARM::LDR_POST_IMM:
3560  case ARM::LDRB_POST_IMM:
3561  case ARM::STRB_POST_IMM:
3562  case ARM::STRB_POST_REG:
3563  case ARM::STRB_PRE_IMM:
3564  case ARM::STRH_POST:
3565  case ARM::STR_POST_IMM:
3566  case ARM::STR_POST_REG:
3567  case ARM::STR_PRE_IMM:
3568  return 2;
3569 
3570  case ARM::LDRSB_PRE:
3571  case ARM::LDRSH_PRE: {
3572  Register Rm = MI.getOperand(3).getReg();
3573  if (Rm == 0)
3574  return 3;
3575  Register Rt = MI.getOperand(0).getReg();
3576  if (Rt == Rm)
3577  return 4;
3578  unsigned ShOpVal = MI.getOperand(4).getImm();
3579  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3580  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3581  if (!isSub &&
3582  (ShImm == 0 ||
3583  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3584  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3585  return 3;
3586  return 4;
3587  }
3588 
3589  case ARM::LDRD: {
3590  Register Rt = MI.getOperand(0).getReg();
3591  Register Rn = MI.getOperand(2).getReg();
3592  Register Rm = MI.getOperand(3).getReg();
3593  if (Rm)
3594  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3595  : 3;
3596  return (Rt == Rn) ? 3 : 2;
3597  }
3598 
3599  case ARM::STRD: {
3600  Register Rm = MI.getOperand(3).getReg();
3601  if (Rm)
3602  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3603  : 3;
3604  return 2;
3605  }
3606 
3607  case ARM::LDRD_POST:
3608  case ARM::t2LDRD_POST:
3609  return 3;
3610 
3611  case ARM::STRD_POST:
3612  case ARM::t2STRD_POST:
3613  return 4;
3614 
3615  case ARM::LDRD_PRE: {
3616  Register Rt = MI.getOperand(0).getReg();
3617  Register Rn = MI.getOperand(3).getReg();
3618  Register Rm = MI.getOperand(4).getReg();
3619  if (Rm)
3620  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3621  : 4;
3622  return (Rt == Rn) ? 4 : 3;
3623  }
3624 
3625  case ARM::t2LDRD_PRE: {
3626  Register Rt = MI.getOperand(0).getReg();
3627  Register Rn = MI.getOperand(3).getReg();
3628  return (Rt == Rn) ? 4 : 3;
3629  }
3630 
3631  case ARM::STRD_PRE: {
3632  Register Rm = MI.getOperand(4).getReg();
3633  if (Rm)
3634  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3635  : 4;
3636  return 3;
3637  }
3638 
3639  case ARM::t2STRD_PRE:
3640  return 3;
3641 
3642  case ARM::t2LDR_POST:
3643  case ARM::t2LDRB_POST:
3644  case ARM::t2LDRB_PRE:
3645  case ARM::t2LDRSBi12:
3646  case ARM::t2LDRSBi8:
3647  case ARM::t2LDRSBpci:
3648  case ARM::t2LDRSBs:
3649  case ARM::t2LDRH_POST:
3650  case ARM::t2LDRH_PRE:
3651  case ARM::t2LDRSBT:
3652  case ARM::t2LDRSB_POST:
3653  case ARM::t2LDRSB_PRE:
3654  case ARM::t2LDRSH_POST:
3655  case ARM::t2LDRSH_PRE:
3656  case ARM::t2LDRSHi12:
3657  case ARM::t2LDRSHi8:
3658  case ARM::t2LDRSHpci:
3659  case ARM::t2LDRSHs:
3660  return 2;
3661 
3662  case ARM::t2LDRDi8: {
3663  Register Rt = MI.getOperand(0).getReg();
3664  Register Rn = MI.getOperand(2).getReg();
3665  return (Rt == Rn) ? 3 : 2;
3666  }
3667 
3668  case ARM::t2STRB_POST:
3669  case ARM::t2STRB_PRE:
3670  case ARM::t2STRBs:
3671  case ARM::t2STRDi8:
3672  case ARM::t2STRH_POST:
3673  case ARM::t2STRH_PRE:
3674  case ARM::t2STRHs:
3675  case ARM::t2STR_POST:
3676  case ARM::t2STR_PRE:
3677  case ARM::t2STRs:
3678  return 2;
3679  }
3680 }
3681 
3682 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3683 // can't be easily determined return 0 (missing MachineMemOperand).
3684 //
3685 // FIXME: The current MachineInstr design does not support relying on machine
3686 // mem operands to determine the width of a memory access. Instead, we expect
3687 // the target to provide this information based on the instruction opcode and
3688 // operands. However, using MachineMemOperand is the best solution now for
3689 // two reasons:
3690 //
3691 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3692 // operands. This is much more dangerous than using the MachineMemOperand
3693 // sizes because CodeGen passes can insert/remove optional machine operands. In
3694 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3695 // postRA passes as well.
3696 //
3697 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3698 // machine model that calls this should handle the unknown (zero size) case.
3699 //
3700 // Long term, we should require a target hook that verifies MachineMemOperand
3701 // sizes during MC lowering. That target hook should be local to MC lowering
3702 // because we can't ensure that it is aware of other MI forms. Doing this will
3703 // ensure that MachineMemOperands are correctly propagated through all passes.
3705  unsigned Size = 0;
3706  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3707  E = MI.memoperands_end();
3708  I != E; ++I) {
3709  Size += (*I)->getSize();
3710  }
3711  // FIXME: The scheduler currently can't handle values larger than 16. But
3712  // the values can actually go up to 32 for floating-point load/store
3713  // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3714  // operations isn't right; we could end up with "extra" memory operands for
3715  // various reasons, like tail merge merging two memory operations.
3716  return std::min(Size / 4, 16U);
3717 }
3718 
3719 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3720  unsigned NumRegs) {
3721  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3722  switch (Opc) {
3723  default:
3724  break;
3725  case ARM::VLDMDIA_UPD:
3726  case ARM::VLDMDDB_UPD:
3727  case ARM::VLDMSIA_UPD:
3728  case ARM::VLDMSDB_UPD:
3729  case ARM::VSTMDIA_UPD:
3730  case ARM::VSTMDDB_UPD:
3731  case ARM::VSTMSIA_UPD:
3732  case ARM::VSTMSDB_UPD:
3733  case ARM::LDMIA_UPD:
3734  case ARM::LDMDA_UPD:
3735  case ARM::LDMDB_UPD:
3736  case ARM::LDMIB_UPD:
3737  case ARM::STMIA_UPD:
3738  case ARM::STMDA_UPD:
3739  case ARM::STMDB_UPD:
3740  case ARM::STMIB_UPD:
3741  case ARM::tLDMIA_UPD:
3742  case ARM::tSTMIA_UPD:
3743  case ARM::t2LDMIA_UPD:
3744  case ARM::t2LDMDB_UPD:
3745  case ARM::t2STMIA_UPD:
3746  case ARM::t2STMDB_UPD:
3747  ++UOps; // One for base register writeback.
3748  break;
3749  case ARM::LDMIA_RET:
3750  case ARM::tPOP_RET:
3751  case ARM::t2LDMIA_RET:
3752  UOps += 2; // One for base reg wb, one for write to pc.
3753  break;
3754  }
3755  return UOps;
3756 }
3757 
3759  const MachineInstr &MI) const {
3760  if (!ItinData || ItinData->isEmpty())
3761  return 1;
3762 
3763  const MCInstrDesc &Desc = MI.getDesc();
3764  unsigned Class = Desc.getSchedClass();
3765  int ItinUOps = ItinData->getNumMicroOps(Class);
3766  if (ItinUOps >= 0) {
3767  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3768  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3769 
3770  return ItinUOps;
3771  }
3772 
3773  unsigned Opc = MI.getOpcode();
3774  switch (Opc) {
3775  default:
3776  llvm_unreachable("Unexpected multi-uops instruction!");
3777  case ARM::VLDMQIA:
3778  case ARM::VSTMQIA:
3779  return 2;
3780 
3781  // The number of uOps for load / store multiple are determined by the number
3782  // registers.
3783  //
3784  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3785  // same cycle. The scheduling for the first load / store must be done
3786  // separately by assuming the address is not 64-bit aligned.
3787  //
3788  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3789  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3790  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3791  case ARM::VLDMDIA:
3792  case ARM::VLDMDIA_UPD:
3793  case ARM::VLDMDDB_UPD:
3794  case ARM::VLDMSIA:
3795  case ARM::VLDMSIA_UPD:
3796  case ARM::VLDMSDB_UPD:
3797  case ARM::VSTMDIA:
3798  case ARM::VSTMDIA_UPD:
3799  case ARM::VSTMDDB_UPD:
3800  case ARM::VSTMSIA:
3801  case ARM::VSTMSIA_UPD:
3802  case ARM::VSTMSDB_UPD: {
3803  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3804  return (NumRegs / 2) + (NumRegs % 2) + 1;
3805  }
3806 
3807  case ARM::LDMIA_RET:
3808  case ARM::LDMIA:
3809  case ARM::LDMDA:
3810  case ARM::LDMDB:
3811  case ARM::LDMIB:
3812  case ARM::LDMIA_UPD:
3813  case ARM::LDMDA_UPD:
3814  case ARM::LDMDB_UPD:
3815  case ARM::LDMIB_UPD:
3816  case ARM::STMIA:
3817  case ARM::STMDA:
3818  case ARM::STMDB:
3819  case ARM::STMIB:
3820  case ARM::STMIA_UPD:
3821  case ARM::STMDA_UPD:
3822  case ARM::STMDB_UPD:
3823  case ARM::STMIB_UPD:
3824  case ARM::tLDMIA:
3825  case ARM::tLDMIA_UPD:
3826  case ARM::tSTMIA_UPD:
3827  case ARM::tPOP_RET:
3828  case ARM::tPOP:
3829  case ARM::tPUSH:
3830  case ARM::t2LDMIA_RET:
3831  case ARM::t2LDMIA:
3832  case ARM::t2LDMDB:
3833  case ARM::t2LDMIA_UPD:
3834  case ARM::t2LDMDB_UPD:
3835  case ARM::t2STMIA:
3836  case ARM::t2STMDB:
3837  case ARM::t2STMIA_UPD:
3838  case ARM::t2STMDB_UPD: {
3839  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3840  switch (Subtarget.getLdStMultipleTiming()) {
3842  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3844  // Assume the worst.
3845  return NumRegs;
3847  if (NumRegs < 4)
3848  return 2;
3849  // 4 registers would be issued: 2, 2.
3850  // 5 registers would be issued: 2, 2, 1.
3851  unsigned UOps = (NumRegs / 2);
3852  if (NumRegs % 2)
3853  ++UOps;
3854  return UOps;
3855  }
3857  unsigned UOps = (NumRegs / 2);
3858  // If there are odd number of registers or if it's not 64-bit aligned,
3859  // then it takes an extra AGU (Address Generation Unit) cycle.
3860  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3861  (*MI.memoperands_begin())->getAlign() < Align(8))
3862  ++UOps;
3863  return UOps;
3864  }
3865  }
3866  }
3867  }
3868  llvm_unreachable("Didn't find the number of microops");
3869 }
3870 
3871 int
3872 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3873  const MCInstrDesc &DefMCID,
3874  unsigned DefClass,
3875  unsigned DefIdx, unsigned DefAlign) const {
3876  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3877  if (RegNo <= 0)
3878  // Def is the address writeback.
3879  return ItinData->getOperandCycle(DefClass, DefIdx);
3880 
3881  int DefCycle;
3882  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3883  // (regno / 2) + (regno % 2) + 1
3884  DefCycle = RegNo / 2 + 1;
3885  if (RegNo % 2)
3886  ++DefCycle;
3887  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3888  DefCycle = RegNo;
3889  bool isSLoad = false;
3890 
3891  switch (DefMCID.getOpcode()) {
3892  default: break;
3893  case ARM::VLDMSIA:
3894  case ARM::VLDMSIA_UPD:
3895  case ARM::VLDMSDB_UPD:
3896  isSLoad = true;
3897  break;
3898  }
3899 
3900  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3901  // then it takes an extra cycle.
3902  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3903  ++DefCycle;
3904  } else {
3905  // Assume the worst.
3906  DefCycle = RegNo + 2;
3907  }
3908 
3909  return DefCycle;
3910 }
3911 
3912 int
3913 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3914  const MCInstrDesc &DefMCID,
3915  unsigned DefClass,
3916  unsigned DefIdx, unsigned DefAlign) const {
3917  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3918  if (RegNo <= 0)
3919  // Def is the address writeback.
3920  return ItinData->getOperandCycle(DefClass, DefIdx);
3921 
3922  int DefCycle;
3923  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3924  // 4 registers would be issued: 1, 2, 1.
3925  // 5 registers would be issued: 1, 2, 2.
3926  DefCycle = RegNo / 2;
3927  if (DefCycle < 1)
3928  DefCycle = 1;
3929  // Result latency is issue cycle + 2: E2.
3930  DefCycle += 2;
3931  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3932  DefCycle = (RegNo / 2);
3933  // If there are odd number of registers or if it's not 64-bit aligned,
3934  // then it takes an extra AGU (Address Generation Unit) cycle.
3935  if ((RegNo % 2) || DefAlign < 8)
3936  ++DefCycle;
3937  // Result latency is AGU cycles + 2.
3938  DefCycle += 2;
3939  } else {
3940  // Assume the worst.
3941  DefCycle = RegNo + 2;
3942  }
3943 
3944  return DefCycle;
3945 }
3946 
3947 int
3948 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3949  const MCInstrDesc &UseMCID,
3950  unsigned UseClass,
3951  unsigned UseIdx, unsigned UseAlign) const {
3952  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3953  if (RegNo <= 0)
3954  return ItinData->getOperandCycle(UseClass, UseIdx);
3955 
3956  int UseCycle;
3957  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3958  // (regno / 2) + (regno % 2) + 1
3959  UseCycle = RegNo / 2 + 1;
3960  if (RegNo % 2)
3961  ++UseCycle;
3962  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3963  UseCycle = RegNo;
3964  bool isSStore = false;
3965 
3966  switch (UseMCID.getOpcode()) {
3967  default: break;
3968  case ARM::VSTMSIA:
3969  case ARM::VSTMSIA_UPD:
3970  case ARM::VSTMSDB_UPD:
3971  isSStore = true;
3972  break;
3973  }
3974 
3975  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3976  // then it takes an extra cycle.
3977  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3978  ++UseCycle;
3979  } else {
3980  // Assume the worst.
3981  UseCycle = RegNo + 2;
3982  }
3983 
3984  return UseCycle;
3985 }
3986 
3987 int
3988 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3989  const MCInstrDesc &UseMCID,
3990  unsigned UseClass,
3991  unsigned UseIdx, unsigned UseAlign) const {
3992  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3993  if (RegNo <= 0)
3994  return ItinData->getOperandCycle(UseClass, UseIdx);
3995 
3996  int UseCycle;
3997  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3998  UseCycle = RegNo / 2;
3999  if (UseCycle < 2)
4000  UseCycle = 2;
4001  // Read in E3.
4002  UseCycle += 2;
4003  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4004  UseCycle = (RegNo / 2);
4005  // If there are odd number of registers or if it's not 64-bit aligned,
4006  // then it takes an extra AGU (Address Generation Unit) cycle.
4007  if ((RegNo % 2) || UseAlign < 8)
4008  ++UseCycle;
4009  } else {
4010  // Assume the worst.
4011  UseCycle = 1;
4012  }
4013  return UseCycle;
4014 }
4015 
4016 int
4018  const MCInstrDesc &DefMCID,
4019  unsigned DefIdx, unsigned DefAlign,
4020  const MCInstrDesc &UseMCID,
4021  unsigned UseIdx, unsigned UseAlign) const {
4022  unsigned DefClass = DefMCID.getSchedClass();
4023  unsigned UseClass = UseMCID.getSchedClass();
4024 
4025  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4026  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4027 
4028  // This may be a def / use of a variable_ops instruction, the operand
4029  // latency might be determinable dynamically. Let the target try to
4030  // figure it out.
4031  int DefCycle = -1;
4032  bool LdmBypass = false;
4033  switch (DefMCID.getOpcode()) {
4034  default:
4035  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4036  break;
4037 
4038  case ARM::VLDMDIA:
4039  case ARM::VLDMDIA_UPD:
4040  case ARM::VLDMDDB_UPD:
4041  case ARM::VLDMSIA:
4042  case ARM::VLDMSIA_UPD:
4043  case ARM::VLDMSDB_UPD:
4044  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4045  break;
4046 
4047  case ARM::LDMIA_RET:
4048  case ARM::LDMIA:
4049  case ARM::LDMDA:
4050  case ARM::LDMDB:
4051  case ARM::LDMIB:
4052  case ARM::LDMIA_UPD:
4053  case ARM::LDMDA_UPD:
4054  case ARM::LDMDB_UPD:
4055  case ARM::LDMIB_UPD:
4056  case ARM::tLDMIA:
4057  case ARM::tLDMIA_UPD:
4058  case ARM::tPUSH:
4059  case ARM::t2LDMIA_RET:
4060  case ARM::t2LDMIA:
4061  case ARM::t2LDMDB:
4062  case ARM::t2LDMIA_UPD:
4063  case ARM::t2LDMDB_UPD:
4064  LdmBypass = true;
4065  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4066  break;
4067  }
4068 
4069  if (DefCycle == -1)
4070  // We can't seem to determine the result latency of the def, assume it's 2.
4071  DefCycle = 2;
4072 
4073  int UseCycle = -1;
4074  switch (UseMCID.getOpcode()) {
4075  default:
4076  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4077  break;
4078 
4079  case ARM::VSTMDIA:
4080  case ARM::VSTMDIA_UPD:
4081  case ARM::VSTMDDB_UPD:
4082  case ARM::VSTMSIA:
4083  case ARM::VSTMSIA_UPD:
4084  case ARM::VSTMSDB_UPD:
4085  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4086  break;
4087 
4088  case ARM::STMIA:
4089  case ARM::STMDA:
4090  case ARM::STMDB:
4091  case ARM::STMIB:
4092  case ARM::STMIA_UPD:
4093  case ARM::STMDA_UPD:
4094  case ARM::STMDB_UPD:
4095  case ARM::STMIB_UPD:
4096  case ARM::tSTMIA_UPD:
4097  case ARM::tPOP_RET:
4098  case ARM::tPOP:
4099  case ARM::t2STMIA:
4100  case ARM::t2STMDB:
4101  case ARM::t2STMIA_UPD:
4102  case ARM::t2STMDB_UPD:
4103  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4104  break;
4105  }
4106 
4107  if (UseCycle == -1)
4108  // Assume it's read in the first stage.
4109  UseCycle = 1;
4110 
4111  UseCycle = DefCycle - UseCycle + 1;
4112  if (UseCycle > 0) {
4113  if (LdmBypass) {
4114  // It's a variable_ops instruction so we can't use DefIdx here. Just use
4115  // first def operand.
4116  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4117  UseClass, UseIdx))
4118  --UseCycle;
4119  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4120  UseClass, UseIdx)) {
4121  --UseCycle;
4122  }
4123  }
4124 
4125  return UseCycle;
4126 }
4127 
4129  const MachineInstr *MI, unsigned Reg,
4130  unsigned &DefIdx, unsigned &Dist) {
4131  Dist = 0;
4132 
4134  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4135  assert(II->isInsideBundle() && "Empty bundle?");
4136 
4137  int Idx = -1;
4138  while (II->isInsideBundle()) {
4139  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
4140  if (Idx != -1)
4141  break;
4142  --II;
4143  ++Dist;
4144  }
4145 
4146  assert(Idx != -1 && "Cannot find bundled definition!");
4147  DefIdx = Idx;
4148  return &*II;
4149 }
4150 
4152  const MachineInstr &MI, unsigned Reg,
4153  unsigned &UseIdx, unsigned &Dist) {
4154  Dist = 0;
4155 
4156  MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4157  assert(II->isInsideBundle() && "Empty bundle?");
4158  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4159 
4160  // FIXME: This doesn't properly handle multiple uses.
4161  int Idx = -1;
4162  while (II != E && II->isInsideBundle()) {
4163  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
4164  if (Idx != -1)
4165  break;
4166  if (II->getOpcode() != ARM::t2IT)
4167  ++Dist;
4168  ++II;
4169  }
4170 
4171  if (Idx == -1) {
4172  Dist = 0;
4173  return nullptr;
4174  }
4175 
4176  UseIdx = Idx;
4177  return &*II;
4178 }
4179 
4180 /// Return the number of cycles to add to (or subtract from) the static
4181 /// itinerary based on the def opcode and alignment. The caller will ensure that
4182 /// adjusted latency is at least one cycle.
4183 static int adjustDefLatency(const ARMSubtarget &Subtarget,
4184  const MachineInstr &DefMI,
4185  const MCInstrDesc &DefMCID, unsigned DefAlign) {
4186  int Adjust = 0;
4187  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4188  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4189  // variants are one cycle cheaper.
4190  switch (DefMCID.getOpcode()) {
4191  default: break;
4192  case ARM::LDRrs:
4193  case ARM::LDRBrs: {
4194  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4195  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4196  if (ShImm == 0 ||
4197  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4198  --Adjust;
4199  break;
4200  }
4201  case ARM::t2LDRs:
4202  case ARM::t2LDRBs:
4203  case ARM::t2LDRHs:
4204  case ARM::t2LDRSHs: {
4205  // Thumb2 mode: lsl only.
4206  unsigned ShAmt = DefMI.getOperand(3).getImm();
4207  if (ShAmt == 0 || ShAmt == 2)
4208  --Adjust;
4209  break;
4210  }
4211  }
4212  } else if (Subtarget.isSwift()) {
4213  // FIXME: Properly handle all of the latency adjustments for address
4214  // writeback.
4215  switch (DefMCID.getOpcode()) {
4216  default: break;
4217  case ARM::LDRrs:
4218  case ARM::LDRBrs: {
4219  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4220  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4221  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4222  if (!isSub &&
4223  (ShImm == 0 ||
4224  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4225  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4226  Adjust -= 2;
4227  else if (!isSub &&
4228  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4229  --Adjust;
4230  break;
4231  }
4232  case ARM::t2LDRs:
4233  case ARM::t2LDRBs:
4234  case ARM::t2LDRHs:
4235  case ARM::t2LDRSHs: {
4236  // Thumb2 mode: lsl only.
4237  unsigned ShAmt = DefMI.getOperand(3).getImm();
4238  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4239  Adjust -= 2;
4240  break;
4241  }
4242  }
4243  }
4244 
4245  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4246  switch (DefMCID.getOpcode()) {
4247  default: break;
4248  case ARM::VLD1q8:
4249  case ARM::VLD1q16:
4250  case ARM::VLD1q32:
4251  case ARM::VLD1q64:
4252  case ARM::VLD1q8wb_fixed:
4253  case ARM::VLD1q16wb_fixed:
4254  case ARM::VLD1q32wb_fixed:
4255  case ARM::VLD1q64wb_fixed:
4256  case ARM::VLD1q8wb_register:
4257  case ARM::VLD1q16wb_register:
4258  case ARM::VLD1q32wb_register:
4259  case ARM::VLD1q64wb_register:
4260  case ARM::VLD2d8:
4261  case ARM::VLD2d16:
4262  case ARM::VLD2d32:
4263  case ARM::VLD2q8:
4264  case ARM::VLD2q16:
4265  case ARM::VLD2q32:
4266  case ARM::VLD2d8wb_fixed:
4267  case ARM::VLD2d16wb_fixed:
4268  case ARM::VLD2d32wb_fixed:
4269  case ARM::VLD2q8wb_fixed:
4270  case ARM::VLD2q16wb_fixed:
4271  case ARM::VLD2q32wb_fixed:
4272  case ARM::VLD2d8wb_register:
4273  case ARM::VLD2d16wb_register:
4274  case ARM::VLD2d32wb_register:
4275  case ARM::VLD2q8wb_register:
4276  case ARM::VLD2q16wb_register:
4277  case ARM::VLD2q32wb_register:
4278  case ARM::VLD3d8:
4279  case ARM::VLD3d16:
4280  case ARM::VLD3d32:
4281  case ARM::VLD1d64T:
4282  case ARM::VLD3d8_UPD:
4283  case ARM::VLD3d16_UPD:
4284  case ARM::VLD3d32_UPD:
4285  case ARM::VLD1d64Twb_fixed:
4286  case ARM::VLD1d64Twb_register:
4287  case ARM::VLD3q8_UPD:
4288  case ARM::VLD3q16_UPD:
4289  case ARM::VLD3q32_UPD:
4290  case ARM::VLD4d8:
4291  case ARM::VLD4d16:
4292  case ARM::VLD4d32:
4293  case ARM::VLD1d64Q:
4294  case ARM::VLD4d8_UPD:
4295  case ARM::VLD4d16_UPD:
4296  case ARM::VLD4d32_UPD:
4297  case ARM::VLD1d64Qwb_fixed:
4298  case ARM::VLD1d64Qwb_register:
4299  case ARM::VLD4q8_UPD:
4300  case ARM::VLD4q16_UPD:
4301  case ARM::VLD4q32_UPD:
4302  case ARM::VLD1DUPq8:
4303  case ARM::VLD1DUPq16:
4304  case ARM::VLD1DUPq32:
4305  case ARM::VLD1DUPq8wb_fixed:
4306  case ARM::VLD1DUPq16wb_fixed:
4307  case ARM::VLD1DUPq32wb_fixed:
4308  case ARM::VLD1DUPq8wb_register:
4309  case ARM::VLD1DUPq16wb_register:
4310  case ARM::VLD1DUPq32wb_register:
4311  case ARM::VLD2DUPd8:
4312  case ARM::VLD2DUPd16:
4313  case ARM::VLD2DUPd32:
4314  case ARM::VLD2DUPd8wb_fixed:
4315  case ARM::VLD2DUPd16wb_fixed:
4316  case ARM::VLD2DUPd32wb_fixed:
4317  case ARM::VLD2DUPd8wb_register:
4318  case ARM::VLD2DUPd16wb_register:
4319  case ARM::VLD2DUPd32wb_register:
4320  case ARM::VLD4DUPd8:
4321  case ARM::VLD4DUPd16:
4322  case ARM::VLD4DUPd32:
4323  case ARM::VLD4DUPd8_UPD:
4324  case ARM::VLD4DUPd16_UPD:
4325  case ARM::VLD4DUPd32_UPD:
4326  case ARM::VLD1LNd8:
4327  case ARM::VLD1LNd16:
4328  case ARM::VLD1LNd32:
4329  case ARM::VLD1LNd8_UPD:
4330  case ARM::VLD1LNd16_UPD:
4331  case ARM::VLD1LNd32_UPD:
4332  case ARM::VLD2LNd8:
4333  case ARM::VLD2LNd16:
4334  case ARM::VLD2LNd32:
4335  case ARM::VLD2LNq16:
4336  case ARM::VLD2LNq32:
4337  case ARM::VLD2LNd8_UPD:
4338  case ARM::VLD2LNd16_UPD:
4339  case ARM::VLD2LNd32_UPD:
4340  case ARM::VLD2LNq16_UPD:
4341  case ARM::VLD2LNq32_UPD:
4342  case ARM::VLD4LNd8:
4343  case ARM::VLD4LNd16:
4344  case ARM::VLD4LNd32:
4345  case ARM::VLD4LNq16:
4346  case ARM::VLD4LNq32:
4347  case ARM::VLD4LNd8_UPD:
4348  case ARM::VLD4LNd16_UPD:
4349  case ARM::VLD4LNd32_UPD:
4350  case ARM::VLD4LNq16_UPD:
4351  case ARM::VLD4LNq32_UPD:
4352  // If the address is not 64-bit aligned, the latencies of these
4353  // instructions increases by one.
4354  ++Adjust;
4355  break;
4356  }
4357  }
4358  return Adjust;
4359 }
4360 
4362  const MachineInstr &DefMI,
4363  unsigned DefIdx,
4364  const MachineInstr &UseMI,
4365  unsigned UseIdx) const {
4366  // No operand latency. The caller may fall back to getInstrLatency.
4367  if (!ItinData || ItinData->isEmpty())
4368  return -1;
4369 
4370  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4371  Register Reg = DefMO.getReg();
4372 
4373  const MachineInstr *ResolvedDefMI = &DefMI;
4374  unsigned DefAdj = 0;
4375  if (DefMI.isBundle())
4376  ResolvedDefMI =
4377  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4378  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4379  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4380  return 1;
4381  }
4382 
4383  const MachineInstr *ResolvedUseMI = &UseMI;
4384  unsigned UseAdj = 0;
4385  if (UseMI.isBundle()) {
4386  ResolvedUseMI =
4387  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4388  if (!ResolvedUseMI)
4389  return -1;
4390  }
4391 
4392  return getOperandLatencyImpl(
4393  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4394  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4395 }
4396 
4397 int ARMBaseInstrInfo::getOperandLatencyImpl(
4398  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4399  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4400  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4401  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4402  if (Reg == ARM::CPSR) {
4403  if (DefMI.getOpcode() == ARM::FMSTAT) {
4404  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4405  return Subtarget.isLikeA9() ? 1 : 20;
4406  }
4407 
4408  // CPSR set and branch can be paired in the same cycle.
4409  if (UseMI.isBranch())
4410  return 0;
4411 
4412  // Otherwise it takes the instruction latency (generally one).
4413  unsigned Latency = getInstrLatency(ItinData, DefMI);
4414 
4415  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4416  // its uses. Instructions which are otherwise scheduled between them may
4417  // incur a code size penalty (not able to use the CPSR setting 16-bit
4418  // instructions).
4419  if (Latency > 0 && Subtarget.isThumb2()) {
4420  const MachineFunction *MF = DefMI.getParent()->getParent();
4421  // FIXME: Use Function::hasOptSize().
4422  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4423  --Latency;
4424  }
4425  return Latency;
4426  }
4427 
4428  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4429  return -1;
4430 
4431  unsigned DefAlign = DefMI.hasOneMemOperand()
4432  ? (*DefMI.memoperands_begin())->getAlign().value()
4433  : 0;
4434  unsigned UseAlign = UseMI.hasOneMemOperand()
4435  ? (*UseMI.memoperands_begin())->getAlign().value()
4436  : 0;
4437 
4438  // Get the itinerary's latency if possible, and handle variable_ops.
4439  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4440  UseIdx, UseAlign);
4441  // Unable to find operand latency. The caller may resort to getInstrLatency.
4442  if (Latency < 0)
4443  return Latency;
4444 
4445  // Adjust for IT block position.
4446  int Adj = DefAdj + UseAdj;
4447 
4448  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4449  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4450  if (Adj >= 0 || (int)Latency > -Adj) {
4451  return Latency + Adj;
4452  }
4453  // Return the itinerary latency, which may be zero but not less than zero.
4454  return Latency;
4455 }
4456 
4457 int
4459  SDNode *DefNode, unsigned DefIdx,
4460  SDNode *UseNode, unsigned UseIdx) const {
4461  if (!DefNode->isMachineOpcode())
4462  return 1;
4463 
4464  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4465 
4466  if (isZeroCost(DefMCID.Opcode))
4467  return 0;
4468 
4469  if (!ItinData || ItinData->isEmpty())
4470  return DefMCID.mayLoad() ? 3 : 1;
4471 
4472  if (!UseNode->isMachineOpcode()) {
4473  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4474  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4475  int Threshold = 1 + Adj;
4476  return Latency <= Threshold ? 1 : Latency - Adj;
4477  }
4478 
4479  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4480  auto *DefMN = cast<MachineSDNode>(DefNode);
4481  unsigned DefAlign = !DefMN->memoperands_empty()
4482  ? (*DefMN->memoperands_begin())->getAlign().value()
4483  : 0;
4484  auto *UseMN = cast<MachineSDNode>(UseNode);
4485  unsigned UseAlign = !UseMN->memoperands_empty()
4486  ? (*UseMN->memoperands_begin())->getAlign().value()
4487  : 0;
4488  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4489  UseMCID, UseIdx, UseAlign);
4490 
4491  if (Latency > 1 &&
4492  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4493  Subtarget.isCortexA7())) {
4494  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4495  // variants are one cycle cheaper.
4496  switch (DefMCID.getOpcode()) {
4497  default: break;
4498  case ARM::LDRrs:
4499  case ARM::LDRBrs: {
4500  unsigned ShOpVal =
4501  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4502  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4503  if (ShImm == 0 ||
4504  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4505  --Latency;
4506  break;
4507  }
4508  case ARM::t2LDRs:
4509  case ARM::t2LDRBs:
4510  case ARM::t2LDRHs:
4511  case ARM::t2LDRSHs: {
4512  // Thumb2 mode: lsl only.
4513  unsigned ShAmt =
4514  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4515  if (ShAmt == 0 || ShAmt == 2)
4516  --Latency;
4517  break;
4518  }
4519  }
4520  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4521  // FIXME: Properly handle all of the latency adjustments for address
4522  // writeback.
4523  switch (DefMCID.getOpcode()) {
4524  default: break;
4525  case ARM::LDRrs:
4526  case ARM::LDRBrs: {
4527  unsigned ShOpVal =
4528  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4529  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4530  if (ShImm == 0 ||
4531  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4532  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4533  Latency -= 2;
4534  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4535  --Latency;
4536  break;
4537  }
4538  case ARM::t2LDRs:
4539  case ARM::t2LDRBs:
4540  case ARM::t2LDRHs:
4541  case ARM::t2LDRSHs:
4542  // Thumb2 mode: lsl 0-3 only.
4543  Latency -= 2;
4544  break;
4545  }
4546  }
4547 
4548  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4549  switch (DefMCID.getOpcode()) {
4550  default: break;
4551  case ARM::VLD1q8:
4552  case ARM::VLD1q16:
4553  case ARM::VLD1q32:
4554  case ARM::VLD1q64:
4555  case ARM::VLD1q8wb_register:
4556  case ARM::VLD1q16wb_register:
4557  case ARM::VLD1q32wb_register:
4558  case ARM::VLD1q64wb_register:
4559  case ARM::VLD1q8wb_fixed:
4560  case ARM::VLD1q16wb_fixed:
4561  case ARM::VLD1q32wb_fixed:
4562  case ARM::VLD1q64wb_fixed:
4563  case ARM::VLD2d8:
4564  case ARM::VLD2d16:
4565  case ARM::VLD2d32:
4566  case ARM::VLD2q8Pseudo:
4567  case ARM::VLD2q16Pseudo:
4568  case ARM::VLD2q32Pseudo:
4569  case ARM::VLD2d8wb_fixed:
4570  case ARM::VLD2d16wb_fixed:
4571  case ARM::VLD2d32wb_fixed:
4572  case ARM::VLD2q8PseudoWB_fixed:
4573  case ARM::VLD2q16PseudoWB_fixed:
4574  case ARM::VLD2q32PseudoWB_fixed:
4575  case ARM::VLD2d8wb_register:
4576  case ARM::VLD2d16wb_register:
4577  case ARM::VLD2d32wb_register:
4578  case ARM::VLD2q8PseudoWB_register:
4579  case ARM::VLD2q16PseudoWB_register:
4580  case ARM::VLD2q32PseudoWB_register:
4581  case ARM::VLD3d8Pseudo:
4582  case ARM::VLD3d16Pseudo:
4583  case ARM::VLD3d32Pseudo:
4584  case ARM::VLD1d8TPseudo:
4585  case ARM::VLD1d16TPseudo:
4586  case ARM::VLD1d32TPseudo:
4587  case ARM::VLD1d64TPseudo:
4588  case ARM::VLD1d64TPseudoWB_fixed:
4589  case ARM::VLD1d64TPseudoWB_register:
4590  case ARM::VLD3d8Pseudo_UPD:
4591  case ARM::VLD3d16Pseudo_UPD:
4592  case ARM::VLD3d32Pseudo_UPD:
4593  case ARM::VLD3q8Pseudo_UPD:
4594  case ARM::VLD3q16Pseudo_UPD:
4595  case ARM::VLD3q32Pseudo_UPD:
4596  case ARM::VLD3q8oddPseudo:
4597  case ARM::VLD3q16oddPseudo:
4598  case ARM::VLD3q32oddPseudo:
4599  case ARM::VLD3q8oddPseudo_UPD:
4600  case ARM::VLD3q16oddPseudo_UPD:
4601  case ARM::VLD3q32oddPseudo_UPD:
4602  case ARM::VLD4d8Pseudo:
4603  case ARM::VLD4d16Pseudo:
4604  case ARM::VLD4d32Pseudo:
4605  case ARM::VLD1d8QPseudo:
4606  case ARM::VLD1d16QPseudo:
4607  case ARM::VLD1d32QPseudo:
4608  case ARM::VLD1d64QPseudo:
4609  case ARM::VLD1d64QPseudoWB_fixed:
4610  case ARM::VLD1d64QPseudoWB_register:
4611  case ARM::VLD1q8HighQPseudo:
4612  case ARM::VLD1q8LowQPseudo_UPD:
4613  case ARM::VLD1q8HighTPseudo:
4614  case ARM::VLD1q8LowTPseudo_UPD:
4615  case ARM::VLD1q16HighQPseudo:
4616  case ARM::VLD1q16LowQPseudo_UPD:
4617  case ARM::VLD1q16HighTPseudo:
4618  case ARM::VLD1q16LowTPseudo_UPD:
4619  case ARM::VLD1q32HighQPseudo:
4620  case ARM::VLD1q32LowQPseudo_UPD:
4621  case ARM::VLD1q32HighTPseudo:
4622  case ARM::VLD1q32LowTPseudo_UPD:
4623  case ARM::VLD1q64HighQPseudo:
4624  case ARM::VLD1q64LowQPseudo_UPD:
4625  case ARM::VLD1q64HighTPseudo:
4626  case ARM::VLD1q64LowTPseudo_UPD:
4627  case ARM::VLD4d8Pseudo_UPD:
4628  case ARM::VLD4d16Pseudo_UPD:
4629  case ARM::VLD4d32Pseudo_UPD:
4630  case ARM::VLD4q8Pseudo_UPD:
4631  case ARM::VLD4q16Pseudo_UPD:
4632  case ARM::VLD4q32Pseudo_UPD:
4633  case ARM::VLD4q8oddPseudo:
4634  case ARM::VLD4q16oddPseudo:
4635  case ARM::VLD4q32oddPseudo:
4636  case ARM::VLD4q8oddPseudo_UPD:
4637  case ARM::VLD4q16oddPseudo_UPD:
4638  case ARM::VLD4q32oddPseudo_UPD:
4639  case ARM::VLD1DUPq8:
4640  case ARM::VLD1DUPq16:
4641  case ARM::VLD1DUPq32:
4642  case ARM::VLD1DUPq8wb_fixed:
4643  case ARM::VLD1DUPq16wb_fixed:
4644  case ARM::VLD1DUPq32wb_fixed:
4645  case ARM::VLD1DUPq8wb_register:
4646  case ARM::VLD1DUPq16wb_register:
4647  case ARM::VLD1DUPq32wb_register:
4648  case ARM::VLD2DUPd8:
4649  case ARM::VLD2DUPd16:
4650  case ARM::VLD2DUPd32:
4651  case ARM::VLD2DUPd8wb_fixed:
4652  case ARM::VLD2DUPd16wb_fixed:
4653  case ARM::VLD2DUPd32wb_fixed:
4654  case ARM::VLD2DUPd8wb_register:
4655  case ARM::VLD2DUPd16wb_register:
4656  case ARM::VLD2DUPd32wb_register:
4657  case ARM::VLD2DUPq8EvenPseudo:
4658  case ARM::VLD2DUPq8OddPseudo:
4659  case ARM::VLD2DUPq16EvenPseudo:
4660  case ARM::VLD2DUPq16OddPseudo:
4661  case ARM::VLD2DUPq32EvenPseudo:
4662  case ARM::VLD2DUPq32OddPseudo:
4663  case ARM::VLD3DUPq8EvenPseudo:
4664  case ARM::VLD3DUPq8OddPseudo:
4665  case ARM::VLD3DUPq16EvenPseudo:
4666  case ARM::VLD3DUPq16OddPseudo:
4667  case ARM::VLD3DUPq32EvenPseudo:
4668  case ARM::VLD3DUPq32OddPseudo:
4669  case ARM::VLD4DUPd8Pseudo:
4670  case ARM::VLD4DUPd16Pseudo:
4671  case ARM::VLD4DUPd32Pseudo:
4672  case ARM::VLD4DUPd8Pseudo_UPD:
4673  case ARM::VLD4DUPd16Pseudo_UPD:
4674  case ARM::VLD4DUPd32Pseudo_UPD:
4675  case ARM::VLD4DUPq8EvenPseudo:
4676  case ARM::VLD4DUPq8OddPseudo:
4677  case ARM::VLD4DUPq16EvenPseudo:
4678  case ARM::VLD4DUPq16OddPseudo:
4679  case ARM::VLD4DUPq32EvenPseudo:
4680  case ARM::VLD4DUPq32OddPseudo:
4681  case ARM::VLD1LNq8Pseudo:
4682  case ARM::VLD1LNq16Pseudo:
4683  case ARM::VLD1LNq32Pseudo:
4684  case ARM::VLD1LNq8Pseudo_UPD:
4685  case ARM::VLD1LNq16Pseudo_UPD:
4686  case ARM::VLD1LNq32Pseudo_UPD:
4687  case ARM::VLD2LNd8Pseudo:
4688  case ARM::VLD2LNd16Pseudo:
4689  case ARM::VLD2LNd32Pseudo:
4690  case ARM::VLD2LNq16Pseudo:
4691  case ARM::VLD2LNq32Pseudo:
4692  case ARM::VLD2LNd8Pseudo_UPD:
4693  case ARM::VLD2LNd16Pseudo_UPD:
4694  case ARM::VLD2LNd32Pseudo_UPD:
4695  case ARM::VLD2LNq16Pseudo_UPD:
4696  case ARM::VLD2LNq32Pseudo_UPD:
4697  case ARM::VLD4LNd8Pseudo:
4698  case ARM::VLD4LNd16Pseudo:
4699  case ARM::VLD4LNd32Pseudo:
4700  case ARM::VLD4LNq16Pseudo:
4701  case ARM::VLD4LNq32Pseudo:
4702  case ARM::VLD4LNd8Pseudo_UPD:
4703  case ARM::VLD4LNd16Pseudo_UPD:
4704  case ARM::VLD4LNd32Pseudo_UPD:
4705  case ARM::VLD4LNq16Pseudo_UPD:
4706  case ARM::VLD4LNq32Pseudo_UPD:
4707  // If the address is not 64-bit aligned, the latencies of these
4708  // instructions increases by one.
4709  ++Latency;
4710  break;
4711  }
4712 
4713  return Latency;
4714 }
4715 
4716 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4717  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4718  MI.isImplicitDef())
4719  return 0;
4720 
4721  if (MI.isBundle())
4722  return 0;
4723 
4724  const MCInstrDesc &MCID = MI.getDesc();
4725 
4726  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4727  !Subtarget.cheapPredicableCPSRDef())) {
4728  // When predicated, CPSR is an additional source operand for CPSR updating
4729  // instructions, this apparently increases their latencies.
4730  return 1;
4731  }
4732  return 0;
4733 }
4734 
4735 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4736  const MachineInstr &MI,
4737  unsigned *PredCost) const {
4738  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4739  MI.isImplicitDef())
4740  return 1;
4741 
4742  // An instruction scheduler typically runs on unbundled instructions, however
4743  // other passes may query the latency of a bundled instruction.
4744  if (MI.isBundle()) {
4745  unsigned Latency = 0;
4747  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4748  while (++I != E && I->isInsideBundle()) {
4749  if (I->getOpcode() != ARM::t2IT)
4750  Latency += getInstrLatency(ItinData, *I, PredCost);
4751  }
4752  return Latency;
4753  }
4754 
4755  const MCInstrDesc &MCID = MI.getDesc();
4756  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4757  !Subtarget.cheapPredicableCPSRDef()))) {
4758  // When predicated, CPSR is an additional source operand for CPSR updating
4759  // instructions, this apparently increases their latencies.
4760  *PredCost = 1;
4761  }
4762  // Be sure to call getStageLatency for an empty itinerary in case it has a
4763  // valid MinLatency property.
4764  if (!ItinData)
4765  return MI.mayLoad() ? 3 : 1;
4766 
4767  unsigned Class = MCID.getSchedClass();
4768 
4769  // For instructions with variable uops, use uops as latency.
4770  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4771  return getNumMicroOps(ItinData, MI);
4772 
4773  // For the common case, fall back on the itinerary's latency.
4774  unsigned Latency = ItinData->getStageLatency(Class);
4775 
4776  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4777  unsigned DefAlign =
4778  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4779  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4780  if (Adj >= 0 || (int)Latency > -Adj) {
4781  return Latency + Adj;
4782  }
4783  return Latency;
4784 }
4785 
4786 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4787  SDNode *Node) const {
4788  if (!Node->isMachineOpcode())
4789  return 1;
4790 
4791  if (!ItinData || ItinData->isEmpty())
4792  return 1;
4793 
4794  unsigned Opcode = Node->getMachineOpcode();
4795  switch (Opcode) {
4796  default:
4797  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4798  case ARM::VLDMQIA:
4799  case ARM::VSTMQIA:
4800  return 2;
4801  }
4802 }
4803 
4804 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4805  const MachineRegisterInfo *MRI,
4806  const MachineInstr &DefMI,
4807  unsigned DefIdx,
4808  const MachineInstr &UseMI,
4809  unsigned UseIdx) const {
4810  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4811  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4812  if (Subtarget.nonpipelinedVFP() &&
4813  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4814  return true;
4815 
4816  // Hoist VFP / NEON instructions with 4 or higher latency.
4817  unsigned Latency =
4818  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4819  if (Latency <= 3)
4820  return false;
4821  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4822  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4823 }
4824 
4825 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4826  const MachineInstr &DefMI,
4827  unsigned DefIdx) const {
4828  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4829  if (!ItinData || ItinData->isEmpty())
4830  return false;
4831 
4832  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4833  if (DDomain == ARMII::DomainGeneral) {
4834  unsigned DefClass = DefMI.getDesc().getSchedClass();
4835  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4836  return (DefCycle != -1 && DefCycle <= 2);
4837  }
4838  return false;
4839 }
4840 
4841 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4842  StringRef &ErrInfo) const {
4843  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4844  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4845  return false;
4846  }
4847  if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4848  // Make sure we don't generate a lo-lo mov that isn't supported.
4849  if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4850  !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4851  ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4852  return false;
4853  }
4854  }
4855  if (MI.getOpcode() == ARM::tPUSH ||
4856  MI.getOpcode() == ARM::tPOP ||
4857  MI.getOpcode() == ARM::tPOP_RET) {
4858  for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
4859  if (MI.getOperand(i).isImplicit() ||
4860  !MI.getOperand(i).isReg())
4861  continue;
4862  Register Reg = MI.getOperand(i).getReg();
4863  if (Reg < ARM::R0 || Reg > ARM::R7) {
4864  if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4865  !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4866  ErrInfo = "Unsupported register in Thumb1 push/pop";
4867  return false;
4868  }
4869  }
4870  }
4871  }
4872  if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4873  assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4874  if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4875  MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4876  ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4877  return false;
4878  }
4879  }
4880  return true;
4881 }
4882 
4883 // LoadStackGuard has so far only been implemented for MachO. Different code
4884 // sequence is needed for other targets.
4886  unsigned LoadImmOpc,
4887  unsigned LoadOpc) const {
4888  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4889  "ROPI/RWPI not currently supported with stack guard");
4890 
4891  MachineBasicBlock &MBB = *MI->getParent();
4892  DebugLoc DL = MI->getDebugLoc();
4893  Register Reg = MI->getOperand(0).getReg();
4894  const GlobalValue *GV =
4895  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4896  MachineInstrBuilder MIB;
4897 
4898  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4900 
4901  if (Subtarget.isGVIndirectSymbol(GV)) {
4902  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4903  MIB.addReg(Reg, RegState::Kill).addImm(0);
4904  auto Flags = MachineMemOperand::MOLoad |
4908  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4909  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4910  }
4911 
4912  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4913  MIB.addReg(Reg, RegState::Kill)
4914  .addImm(0)
4915  .cloneMemRefs(*MI)
4916  .add(predOps(ARMCC::AL));
4917 }
4918 
4919 bool
4920 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4921  unsigned &AddSubOpc,
4922  bool &NegAcc, bool &HasLane) const {
4923  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4924  if (I == MLxEntryMap.end())
4925  return false;
4926 
4927  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4928  MulOpc = Entry.MulOpc;
4929  AddSubOpc = Entry.AddSubOpc;
4930  NegAcc = Entry.NegAcc;
4931  HasLane = Entry.HasLane;
4932  return true;
4933 }
4934 
4935 //===----------------------------------------------------------------------===//
4936 // Execution domains.
4937 //===----------------------------------------------------------------------===//
4938 //
4939 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4940 // and some can go down both. The vmov instructions go down the VFP pipeline,
4941 // but they can be changed to vorr equivalents that are executed by the NEON
4942 // pipeline.
4943 //
4944 // We use the following execution domain numbering:
4945 //
4948  ExeVFP = 1,
4950 };
4951 
4952 //
4953 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4954 //
4955 std::pair<uint16_t, uint16_t>
4957  // If we don't have access to NEON instructions then we won't be able
4958  // to swizzle anything to the NEON domain. Check to make sure.
4959  if (Subtarget.hasNEON()) {
4960  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4961  // if they are not predicated.
4962  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4963  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4964 
4965  // CortexA9 is particularly picky about mixing the two and wants these
4966  // converted.
4967  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4968  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4969  MI.getOpcode() == ARM::VMOVS))
4970  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4971  }
4972  // No other instructions can be swizzled, so just determine their domain.
4973  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4974 
4975  if (Domain & ARMII::DomainNEON)
4976  return std::make_pair(ExeNEON, 0);
4977 
4978  // Certain instructions can go either way on Cortex-A8.
4979  // Treat them as NEON instructions.
4980  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4981  return std::make_pair(ExeNEON, 0);
4982 
4983  if (Domain & ARMII::DomainVFP)
4984  return std::make_pair(ExeVFP, 0);
4985 
4986  return std::make_pair(ExeGeneric, 0);
4987 }
4988 
4990  unsigned SReg, unsigned &Lane) {
4991  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4992  Lane = 0;
4993 
4994  if (DReg != ARM::NoRegister)
4995  return DReg;
4996 
4997  Lane = 1;
4998  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4999 
5000  assert(DReg && "S-register with no D super-register?");
5001  return DReg;
5002 }
5003 
5004 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5005 /// set ImplicitSReg to a register number that must be marked as implicit-use or
5006 /// zero if no register needs to be defined as implicit-use.
5007 ///
5008 /// If the function cannot determine if an SPR should be marked implicit use or
5009 /// not, it returns false.
5010 ///
5011 /// This function handles cases where an instruction is being modified from taking
5012 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5013 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5014 /// lane of the DPR).
5015 ///
5016 /// If the other SPR is defined, an implicit-use of it should be added. Else,
5017 /// (including the case where the DPR itself is defined), it should not.
5018 ///
5020  MachineInstr &MI, unsigned DReg,
5021  unsigned Lane, unsigned &ImplicitSReg) {
5022  // If the DPR is defined or used already, the other SPR lane will be chained
5023  // correctly, so there is nothing to be done.
5024  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5025  ImplicitSReg = 0;
5026  return true;
5027  }
5028 
5029  // Otherwise we need to go searching to see if the SPR is set explicitly.
5030  ImplicitSReg = TRI->getSubReg(DReg,
5031  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5033  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5034 
5035  if (LQR == MachineBasicBlock::LQR_Live)
5036  return true;
5037  else if (LQR == MachineBasicBlock::LQR_Unknown)
5038  return false;
5039 
5040  // If the register is known not to be live, there is no need to add an
5041  // implicit-use.
5042  ImplicitSReg = 0;
5043  return true;
5044 }
5045 
5047  unsigned Domain) const {
5048  unsigned DstReg, SrcReg, DReg;
5049  unsigned Lane;
5050  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5052  switch (MI.getOpcode()) {
5053  default:
5054  llvm_unreachable("cannot handle opcode!");
5055  break;
5056  case ARM::VMOVD:
5057  if (Domain != ExeNEON)
5058  break;
5059 
5060  // Zap the predicate operands.
5061  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5062 
5063  // Make sure we've got NEON instructions.
5064  assert(Subtarget.hasNEON() && "VORRd requires NEON");
5065 
5066  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5067  DstReg = MI.getOperand(0).getReg();
5068  SrcReg = MI.getOperand(1).getReg();
5069 
5070  for (unsigned i =