LLVM  14.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "MVETailPredUtils.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/MC/MCAsmInfo.h"
52 #include "llvm/MC/MCInstrDesc.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <new>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "arm-instrinfo"
73 
74 #define GET_INSTRINFO_CTOR_DTOR
75 #include "ARMGenInstrInfo.inc"
76 
77 static cl::opt<bool>
78 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
79  cl::desc("Enable ARM 2-addr to 3-addr conv"));
80 
81 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
82 struct ARM_MLxEntry {
83  uint16_t MLxOpc; // MLA / MLS opcode
84  uint16_t MulOpc; // Expanded multiplication opcode
85  uint16_t AddSubOpc; // Expanded add / sub opcode
86  bool NegAcc; // True if the acc is negated before the add / sub.
87  bool HasLane; // True if instruction has an extra "lane" operand.
88 };
89 
90 static const ARM_MLxEntry ARM_MLxTable[] = {
91  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
92  // fp scalar ops
93  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
94  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
95  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
96  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
97  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
98  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
99  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
100  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
101 
102  // fp SIMD ops
103  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
104  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
105  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
106  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
107  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
108  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
109  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
110  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
111 };
112 
114  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
115  Subtarget(STI) {
116  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
117  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
118  llvm_unreachable("Duplicated entries?");
119  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
120  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
121  }
122 }
123 
124 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
125 // currently defaults to no prepass hazard recognizer.
128  const ScheduleDAG *DAG) const {
129  if (usePreRAHazardRecognizer()) {
130  const InstrItineraryData *II =
131  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
132  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
133  }
135 }
136 
137 // Called during:
138 // - pre-RA scheduling
139 // - post-RA scheduling when FeatureUseMISched is set
141  const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
143 
144  // We would like to restrict this hazard recognizer to only
145  // post-RA scheduling; we can tell that we're post-RA because we don't
146  // track VRegLiveness.
147  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
148  // banks banked on bit 2. Assume that TCMs are in use.
149  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
150  MHR->AddHazardRecognizer(
151  std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
152 
153  // Not inserting ARMHazardRecognizerFPMLx because that would change
154  // legacy behavior
155 
157  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
158  return MHR;
159 }
160 
161 // Called during post-RA scheduling when FeatureUseMISched is not set
164  const ScheduleDAG *DAG) const {
166 
167  if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
168  MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
169 
171  if (BHR)
172  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
173  return MHR;
174 }
175 
176 MachineInstr *
178  LiveIntervals *LIS) const {
179  // FIXME: Thumb2 support.
180 
181  if (!EnableARM3Addr)
182  return nullptr;
183 
184  MachineFunction &MF = *MI.getParent()->getParent();
185  uint64_t TSFlags = MI.getDesc().TSFlags;
186  bool isPre = false;
187  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
188  default: return nullptr;
189  case ARMII::IndexModePre:
190  isPre = true;
191  break;
193  break;
194  }
195 
196  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
197  // operation.
198  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
199  if (MemOpc == 0)
200  return nullptr;
201 
202  MachineInstr *UpdateMI = nullptr;
203  MachineInstr *MemMI = nullptr;
204  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
205  const MCInstrDesc &MCID = MI.getDesc();
206  unsigned NumOps = MCID.getNumOperands();
207  bool isLoad = !MI.mayStore();
208  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
209  const MachineOperand &Base = MI.getOperand(2);
210  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
211  Register WBReg = WB.getReg();
212  Register BaseReg = Base.getReg();
213  Register OffReg = Offset.getReg();
214  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
215  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
216  switch (AddrMode) {
217  default: llvm_unreachable("Unknown indexed op!");
218  case ARMII::AddrMode2: {
219  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
220  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
221  if (OffReg == 0) {
222  if (ARM_AM::getSOImmVal(Amt) == -1)
223  // Can't encode it in a so_imm operand. This transformation will
224  // add more than 1 instruction. Abandon!
225  return nullptr;
226  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
227  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
228  .addReg(BaseReg)
229  .addImm(Amt)
230  .add(predOps(Pred))
231  .add(condCodeOp());
232  } else if (Amt != 0) {
234  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
235  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
236  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
237  .addReg(BaseReg)
238  .addReg(OffReg)
239  .addReg(0)
240  .addImm(SOOpc)
241  .add(predOps(Pred))
242  .add(condCodeOp());
243  } else
244  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
245  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
246  .addReg(BaseReg)
247  .addReg(OffReg)
248  .add(predOps(Pred))
249  .add(condCodeOp());
250  break;
251  }
252  case ARMII::AddrMode3 : {
253  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
254  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
255  if (OffReg == 0)
256  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
257  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
258  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
259  .addReg(BaseReg)
260  .addImm(Amt)
261  .add(predOps(Pred))
262  .add(condCodeOp());
263  else
264  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
265  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
266  .addReg(BaseReg)
267  .addReg(OffReg)
268  .add(predOps(Pred))
269  .add(condCodeOp());
270  break;
271  }
272  }
273 
274  std::vector<MachineInstr*> NewMIs;
275  if (isPre) {
276  if (isLoad)
277  MemMI =
278  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
279  .addReg(WBReg)
280  .addImm(0)
281  .addImm(Pred);
282  else
283  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
284  .addReg(MI.getOperand(1).getReg())
285  .addReg(WBReg)
286  .addReg(0)
287  .addImm(0)
288  .addImm(Pred);
289  NewMIs.push_back(MemMI);
290  NewMIs.push_back(UpdateMI);
291  } else {
292  if (isLoad)
293  MemMI =
294  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
295  .addReg(BaseReg)
296  .addImm(0)
297  .addImm(Pred);
298  else
299  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
300  .addReg(MI.getOperand(1).getReg())
301  .addReg(BaseReg)
302  .addReg(0)
303  .addImm(0)
304  .addImm(Pred);
305  if (WB.isDead())
306  UpdateMI->getOperand(0).setIsDead();
307  NewMIs.push_back(UpdateMI);
308  NewMIs.push_back(MemMI);
309  }
310 
311  // Transfer LiveVariables states, kill / dead info.
312  if (LV) {
313  for (const MachineOperand &MO : MI.operands()) {
314  if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
315  Register Reg = MO.getReg();
316 
318  if (MO.isDef()) {
319  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
320  if (MO.isDead())
321  LV->addVirtualRegisterDead(Reg, *NewMI);
322  }
323  if (MO.isUse() && MO.isKill()) {
324  for (unsigned j = 0; j < 2; ++j) {
325  // Look at the two new MI's in reverse order.
326  MachineInstr *NewMI = NewMIs[j];
327  if (!NewMI->readsRegister(Reg))
328  continue;
329  LV->addVirtualRegisterKilled(Reg, *NewMI);
330  if (VI.removeKill(MI))
331  VI.Kills.push_back(NewMI);
332  break;
333  }
334  }
335  }
336  }
337  }
338 
339  MachineBasicBlock &MBB = *MI.getParent();
340  MBB.insert(MI, NewMIs[1]);
341  MBB.insert(MI, NewMIs[0]);
342  return NewMIs[0];
343 }
344 
345 // Branch analysis.
347  MachineBasicBlock *&TBB,
348  MachineBasicBlock *&FBB,
350  bool AllowModify) const {
351  TBB = nullptr;
352  FBB = nullptr;
353 
355  if (I == MBB.instr_begin())
356  return false; // Empty blocks are easy.
357  --I;
358 
359  // Walk backwards from the end of the basic block until the branch is
360  // analyzed or we give up.
361  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
362  // Flag to be raised on unanalyzeable instructions. This is useful in cases
363  // where we want to clean up on the end of the basic block before we bail
364  // out.
365  bool CantAnalyze = false;
366 
367  // Skip over DEBUG values, predicated nonterminators and speculation
368  // barrier terminators.
369  while (I->isDebugInstr() || !I->isTerminator() ||
370  isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
371  I->getOpcode() == ARM::t2DoLoopStartTP){
372  if (I == MBB.instr_begin())
373  return false;
374  --I;
375  }
376 
377  if (isIndirectBranchOpcode(I->getOpcode()) ||
378  isJumpTableBranchOpcode(I->getOpcode())) {
379  // Indirect branches and jump tables can't be analyzed, but we still want
380  // to clean up any instructions at the tail of the basic block.
381  CantAnalyze = true;
382  } else if (isUncondBranchOpcode(I->getOpcode())) {
383  TBB = I->getOperand(0).getMBB();
384  } else if (isCondBranchOpcode(I->getOpcode())) {
385  // Bail out if we encounter multiple conditional branches.
386  if (!Cond.empty())
387  return true;
388 
389  assert(!FBB && "FBB should have been null.");
390  FBB = TBB;
391  TBB = I->getOperand(0).getMBB();
392  Cond.push_back(I->getOperand(1));
393  Cond.push_back(I->getOperand(2));
394  } else if (I->isReturn()) {
395  // Returns can't be analyzed, but we should run cleanup.
396  CantAnalyze = true;
397  } else {
398  // We encountered other unrecognized terminator. Bail out immediately.
399  return true;
400  }
401 
402  // Cleanup code - to be run for unpredicated unconditional branches and
403  // returns.
404  if (!isPredicated(*I) &&
405  (isUncondBranchOpcode(I->getOpcode()) ||
406  isIndirectBranchOpcode(I->getOpcode()) ||
407  isJumpTableBranchOpcode(I->getOpcode()) ||
408  I->isReturn())) {
409  // Forget any previous condition branch information - it no longer applies.
410  Cond.clear();
411  FBB = nullptr;
412 
413  // If we can modify the function, delete everything below this
414  // unconditional branch.
415  if (AllowModify) {
416  MachineBasicBlock::iterator DI = std::next(I);
417  while (DI != MBB.instr_end()) {
418  MachineInstr &InstToDelete = *DI;
419  ++DI;
420  // Speculation barriers must not be deleted.
421  if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
422  continue;
423  InstToDelete.eraseFromParent();
424  }
425  }
426  }
427 
428  if (CantAnalyze) {
429  // We may not be able to analyze the block, but we could still have
430  // an unconditional branch as the last instruction in the block, which
431  // just branches to layout successor. If this is the case, then just
432  // remove it if we're allowed to make modifications.
433  if (AllowModify && !isPredicated(MBB.back()) &&
435  TBB && MBB.isLayoutSuccessor(TBB))
436  removeBranch(MBB);
437  return true;
438  }
439 
440  if (I == MBB.instr_begin())
441  return false;
442 
443  --I;
444  }
445 
446  // We made it past the terminators without bailing out - we must have
447  // analyzed this branch successfully.
448  return false;
449 }
450 
452  int *BytesRemoved) const {
453  assert(!BytesRemoved && "code size not handled");
454 
456  if (I == MBB.end())
457  return 0;
458 
459  if (!isUncondBranchOpcode(I->getOpcode()) &&
460  !isCondBranchOpcode(I->getOpcode()))
461  return 0;
462 
463  // Remove the branch.
464  I->eraseFromParent();
465 
466  I = MBB.end();
467 
468  if (I == MBB.begin()) return 1;
469  --I;
470  if (!isCondBranchOpcode(I->getOpcode()))
471  return 1;
472 
473  // Remove the branch.
474  I->eraseFromParent();
475  return 2;
476 }
477 
479  MachineBasicBlock *TBB,
480  MachineBasicBlock *FBB,
482  const DebugLoc &DL,
483  int *BytesAdded) const {
484  assert(!BytesAdded && "code size not handled");
486  int BOpc = !AFI->isThumbFunction()
487  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
488  int BccOpc = !AFI->isThumbFunction()
489  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
490  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
491 
492  // Shouldn't be a fall through.
493  assert(TBB && "insertBranch must not be told to insert a fallthrough");
494  assert((Cond.size() == 2 || Cond.size() == 0) &&
495  "ARM branch conditions have two components!");
496 
497  // For conditional branches, we use addOperand to preserve CPSR flags.
498 
499  if (!FBB) {
500  if (Cond.empty()) { // Unconditional branch?
501  if (isThumb)
502  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
503  else
504  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
505  } else
506  BuildMI(&MBB, DL, get(BccOpc))
507  .addMBB(TBB)
508  .addImm(Cond[0].getImm())
509  .add(Cond[1]);
510  return 1;
511  }
512 
513  // Two-way conditional branch.
514  BuildMI(&MBB, DL, get(BccOpc))
515  .addMBB(TBB)
516  .addImm(Cond[0].getImm())
517  .add(Cond[1]);
518  if (isThumb)
519  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
520  else
521  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
522  return 2;
523 }
524 
527  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
528  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
529  return false;
530 }
531 
533  if (MI.isBundle()) {
535  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
536  while (++I != E && I->isInsideBundle()) {
537  int PIdx = I->findFirstPredOperandIdx();
538  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
539  return true;
540  }
541  return false;
542  }
543 
544  int PIdx = MI.findFirstPredOperandIdx();
545  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
546 }
547 
549  const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
550  const TargetRegisterInfo *TRI) const {
551 
552  // First, let's see if there is a generic comment for this operand
553  std::string GenericComment =
555  if (!GenericComment.empty())
556  return GenericComment;
557 
558  // If not, check if we have an immediate operand.
559  if (Op.getType() != MachineOperand::MO_Immediate)
560  return std::string();
561 
562  // And print its corresponding condition code if the immediate is a
563  // predicate.
564  int FirstPredOp = MI.findFirstPredOperandIdx();
565  if (FirstPredOp != (int) OpIdx)
566  return std::string();
567 
568  std::string CC = "CC::";
569  CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
570  return CC;
571 }
572 
575  unsigned Opc = MI.getOpcode();
576  if (isUncondBranchOpcode(Opc)) {
577  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
578  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
579  .addImm(Pred[0].getImm())
580  .addReg(Pred[1].getReg());
581  return true;
582  }
583 
584  int PIdx = MI.findFirstPredOperandIdx();
585  if (PIdx != -1) {
586  MachineOperand &PMO = MI.getOperand(PIdx);
587  PMO.setImm(Pred[0].getImm());
588  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
589 
590  // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
591  // IT block. This affects how they are printed.
592  const MCInstrDesc &MCID = MI.getDesc();
594  assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
595  assert((MI.getOperand(1).isDead() ||
596  MI.getOperand(1).getReg() != ARM::CPSR) &&
597  "if conversion tried to stop defining used CPSR");
598  MI.getOperand(1).setReg(ARM::NoRegister);
599  }
600 
601  return true;
602  }
603  return false;
604 }
605 
607  ArrayRef<MachineOperand> Pred2) const {
608  if (Pred1.size() > 2 || Pred2.size() > 2)
609  return false;
610 
611  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
612  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
613  if (CC1 == CC2)
614  return true;
615 
616  switch (CC1) {
617  default:
618  return false;
619  case ARMCC::AL:
620  return true;
621  case ARMCC::HS:
622  return CC2 == ARMCC::HI;
623  case ARMCC::LS:
624  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
625  case ARMCC::GE:
626  return CC2 == ARMCC::GT;
627  case ARMCC::LE:
628  return CC2 == ARMCC::LT;
629  }
630 }
631 
633  std::vector<MachineOperand> &Pred,
634  bool SkipDead) const {
635  bool Found = false;
636  for (const MachineOperand &MO : MI.operands()) {
637  bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
638  bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
639  if (ClobbersCPSR || IsCPSR) {
640 
641  // Filter out T1 instructions that have a dead CPSR,
642  // allowing IT blocks to be generated containing T1 instructions
643  const MCInstrDesc &MCID = MI.getDesc();
644  if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
645  SkipDead)
646  continue;
647 
648  Pred.push_back(MO);
649  Found = true;
650  }
651  }
652 
653  return Found;
654 }
655 
657  for (const auto &MO : MI.operands())
658  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
659  return true;
660  return false;
661 }
662 
663 static bool isEligibleForITBlock(const MachineInstr *MI) {
664  switch (MI->getOpcode()) {
665  default: return true;
666  case ARM::tADC: // ADC (register) T1
667  case ARM::tADDi3: // ADD (immediate) T1
668  case ARM::tADDi8: // ADD (immediate) T2
669  case ARM::tADDrr: // ADD (register) T1
670  case ARM::tAND: // AND (register) T1
671  case ARM::tASRri: // ASR (immediate) T1
672  case ARM::tASRrr: // ASR (register) T1
673  case ARM::tBIC: // BIC (register) T1
674  case ARM::tEOR: // EOR (register) T1
675  case ARM::tLSLri: // LSL (immediate) T1
676  case ARM::tLSLrr: // LSL (register) T1
677  case ARM::tLSRri: // LSR (immediate) T1
678  case ARM::tLSRrr: // LSR (register) T1
679  case ARM::tMUL: // MUL T1
680  case ARM::tMVN: // MVN (register) T1
681  case ARM::tORR: // ORR (register) T1
682  case ARM::tROR: // ROR (register) T1
683  case ARM::tRSB: // RSB (immediate) T1
684  case ARM::tSBC: // SBC (register) T1
685  case ARM::tSUBi3: // SUB (immediate) T1
686  case ARM::tSUBi8: // SUB (immediate) T2
687  case ARM::tSUBrr: // SUB (register) T1
689  }
690 }
691 
692 /// isPredicable - Return true if the specified instruction can be predicated.
693 /// By default, this returns true for every instruction with a
694 /// PredicateOperand.
696  if (!MI.isPredicable())
697  return false;
698 
699  if (MI.isBundle())
700  return false;
701 
702  if (!isEligibleForITBlock(&MI))
703  return false;
704 
705  const MachineFunction *MF = MI.getParent()->getParent();
706  const ARMFunctionInfo *AFI =
707  MF->getInfo<ARMFunctionInfo>();
708 
709  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
710  // In their ARM encoding, they can't be encoded in a conditional form.
711  if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
712  return false;
713 
714  // Make indirect control flow changes unpredicable when SLS mitigation is
715  // enabled.
716  const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
717  if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
718  return false;
719  if (ST.hardenSlsBlr() && isIndirectCall(MI))
720  return false;
721 
722  if (AFI->isThumb2Function()) {
723  if (getSubtarget().restrictIT())
724  return isV8EligibleForIT(&MI);
725  }
726 
727  return true;
728 }
729 
730 namespace llvm {
731 
732 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
733  for (const MachineOperand &MO : MI->operands()) {
734  if (!MO.isReg() || MO.isUndef() || MO.isUse())
735  continue;
736  if (MO.getReg() != ARM::CPSR)
737  continue;
738  if (!MO.isDead())
739  return false;
740  }
741  // all definitions of CPSR are dead
742  return true;
743 }
744 
745 } // end namespace llvm
746 
747 /// GetInstSize - Return the size of the specified MachineInstr.
748 ///
750  const MachineBasicBlock &MBB = *MI.getParent();
751  const MachineFunction *MF = MBB.getParent();
752  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
753 
754  const MCInstrDesc &MCID = MI.getDesc();
755  if (MCID.getSize())
756  return MCID.getSize();
757 
758  switch (MI.getOpcode()) {
759  default:
760  // pseudo-instruction sizes are zero.
761  return 0;
762  case TargetOpcode::BUNDLE:
763  return getInstBundleLength(MI);
764  case ARM::MOVi16_ga_pcrel:
765  case ARM::MOVTi16_ga_pcrel:
766  case ARM::t2MOVi16_ga_pcrel:
767  case ARM::t2MOVTi16_ga_pcrel:
768  return 4;
769  case ARM::MOVi32imm:
770  case ARM::t2MOVi32imm:
771  return 8;
772  case ARM::CONSTPOOL_ENTRY:
773  case ARM::JUMPTABLE_INSTS:
774  case ARM::JUMPTABLE_ADDRS:
775  case ARM::JUMPTABLE_TBB:
776  case ARM::JUMPTABLE_TBH:
777  // If this machine instr is a constant pool entry, its size is recorded as
778  // operand #2.
779  return MI.getOperand(2).getImm();
780  case ARM::Int_eh_sjlj_longjmp:
781  return 16;
782  case ARM::tInt_eh_sjlj_longjmp:
783  return 10;
784  case ARM::tInt_WIN_eh_sjlj_longjmp:
785  return 12;
786  case ARM::Int_eh_sjlj_setjmp:
787  case ARM::Int_eh_sjlj_setjmp_nofp:
788  return 20;
789  case ARM::tInt_eh_sjlj_setjmp:
790  case ARM::t2Int_eh_sjlj_setjmp:
791  case ARM::t2Int_eh_sjlj_setjmp_nofp:
792  return 12;
793  case ARM::SPACE:
794  return MI.getOperand(1).getImm();
795  case ARM::INLINEASM:
796  case ARM::INLINEASM_BR: {
797  // If this machine instr is an inline asm, measure it.
798  unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
799  if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
800  Size = alignTo(Size, 4);
801  return Size;
802  }
803  case ARM::SpeculationBarrierISBDSBEndBB:
804  case ARM::t2SpeculationBarrierISBDSBEndBB:
805  // This gets lowered to 2 4-byte instructions.
806  return 8;
807  case ARM::SpeculationBarrierSBEndBB:
808  case ARM::t2SpeculationBarrierSBEndBB:
809  // This gets lowered to 1 4-byte instructions.
810  return 4;
811  }
812 }
813 
814 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
815  unsigned Size = 0;
817  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
818  while (++I != E && I->isInsideBundle()) {
819  assert(!I->isBundle() && "No nested bundle!");
821  }
822  return Size;
823 }
824 
827  unsigned DestReg, bool KillSrc,
828  const ARMSubtarget &Subtarget) const {
829  unsigned Opc = Subtarget.isThumb()
830  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
831  : ARM::MRS;
832 
833  MachineInstrBuilder MIB =
834  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
835 
836  // There is only 1 A/R class MRS instruction, and it always refers to
837  // APSR. However, there are lots of other possibilities on M-class cores.
838  if (Subtarget.isMClass())
839  MIB.addImm(0x800);
840 
841  MIB.add(predOps(ARMCC::AL))
842  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
843 }
844 
847  unsigned SrcReg, bool KillSrc,
848  const ARMSubtarget &Subtarget) const {
849  unsigned Opc = Subtarget.isThumb()
850  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
851  : ARM::MSR;
852 
853  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
854 
855  if (Subtarget.isMClass())
856  MIB.addImm(0x800);
857  else
858  MIB.addImm(8);
859 
860  MIB.addReg(SrcReg, getKillRegState(KillSrc))
863 }
864 
866  MIB.addImm(ARMVCC::None);
867  MIB.addReg(0);
868  MIB.addReg(0); // tp_reg
869 }
870 
872  Register DestReg) {
874  MIB.addReg(DestReg, RegState::Undef);
875 }
876 
878  MIB.addImm(Cond);
879  MIB.addReg(ARM::VPR, RegState::Implicit);
880  MIB.addReg(0); // tp_reg
881 }
882 
884  unsigned Cond, unsigned Inactive) {
886  MIB.addReg(Inactive);
887 }
888 
891  const DebugLoc &DL, MCRegister DestReg,
892  MCRegister SrcReg, bool KillSrc) const {
893  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
894  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
895 
896  if (GPRDest && GPRSrc) {
897  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
898  .addReg(SrcReg, getKillRegState(KillSrc))
900  .add(condCodeOp());
901  return;
902  }
903 
904  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
905  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
906 
907  unsigned Opc = 0;
908  if (SPRDest && SPRSrc)
909  Opc = ARM::VMOVS;
910  else if (GPRDest && SPRSrc)
911  Opc = ARM::VMOVRS;
912  else if (SPRDest && GPRSrc)
913  Opc = ARM::VMOVSR;
914  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
915  Opc = ARM::VMOVD;
916  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
917  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
918 
919  if (Opc) {
920  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
921  MIB.addReg(SrcReg, getKillRegState(KillSrc));
922  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
923  MIB.addReg(SrcReg, getKillRegState(KillSrc));
924  if (Opc == ARM::MVE_VORR)
925  addUnpredicatedMveVpredROp(MIB, DestReg);
926  else if (Opc != ARM::MQPRCopy)
927  MIB.add(predOps(ARMCC::AL));
928  return;
929  }
930 
931  // Handle register classes that require multiple instructions.
932  unsigned BeginIdx = 0;
933  unsigned SubRegs = 0;
934  int Spacing = 1;
935 
936  // Use VORRq when possible.
937  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
938  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
939  BeginIdx = ARM::qsub_0;
940  SubRegs = 2;
941  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
942  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
943  BeginIdx = ARM::qsub_0;
944  SubRegs = 4;
945  // Fall back to VMOVD.
946  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
947  Opc = ARM::VMOVD;
948  BeginIdx = ARM::dsub_0;
949  SubRegs = 2;
950  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
951  Opc = ARM::VMOVD;
952  BeginIdx = ARM::dsub_0;
953  SubRegs = 3;
954  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
955  Opc = ARM::VMOVD;
956  BeginIdx = ARM::dsub_0;
957  SubRegs = 4;
958  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
959  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
960  BeginIdx = ARM::gsub_0;
961  SubRegs = 2;
962  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
963  Opc = ARM::VMOVD;
964  BeginIdx = ARM::dsub_0;
965  SubRegs = 2;
966  Spacing = 2;
967  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
968  Opc = ARM::VMOVD;
969  BeginIdx = ARM::dsub_0;
970  SubRegs = 3;
971  Spacing = 2;
972  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
973  Opc = ARM::VMOVD;
974  BeginIdx = ARM::dsub_0;
975  SubRegs = 4;
976  Spacing = 2;
977  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
978  !Subtarget.hasFP64()) {
979  Opc = ARM::VMOVS;
980  BeginIdx = ARM::ssub_0;
981  SubRegs = 2;
982  } else if (SrcReg == ARM::CPSR) {
983  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
984  return;
985  } else if (DestReg == ARM::CPSR) {
986  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
987  return;
988  } else if (DestReg == ARM::VPR) {
989  assert(ARM::GPRRegClass.contains(SrcReg));
990  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
991  .addReg(SrcReg, getKillRegState(KillSrc))
992  .add(predOps(ARMCC::AL));
993  return;
994  } else if (SrcReg == ARM::VPR) {
995  assert(ARM::GPRRegClass.contains(DestReg));
996  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
997  .addReg(SrcReg, getKillRegState(KillSrc))
998  .add(predOps(ARMCC::AL));
999  return;
1000  } else if (DestReg == ARM::FPSCR_NZCV) {
1001  assert(ARM::GPRRegClass.contains(SrcReg));
1002  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1003  .addReg(SrcReg, getKillRegState(KillSrc))
1004  .add(predOps(ARMCC::AL));
1005  return;
1006  } else if (SrcReg == ARM::FPSCR_NZCV) {
1007  assert(ARM::GPRRegClass.contains(DestReg));
1008  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1009  .addReg(SrcReg, getKillRegState(KillSrc))
1010  .add(predOps(ARMCC::AL));
1011  return;
1012  }
1013 
1014  assert(Opc && "Impossible reg-to-reg copy");
1015 
1017  MachineInstrBuilder Mov;
1018 
1019  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1020  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1021  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1022  Spacing = -Spacing;
1023  }
1024 #ifndef NDEBUG
1025  SmallSet<unsigned, 4> DstRegs;
1026 #endif
1027  for (unsigned i = 0; i != SubRegs; ++i) {
1028  Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1029  Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1030  assert(Dst && Src && "Bad sub-register");
1031 #ifndef NDEBUG
1032  assert(!DstRegs.count(Src) && "destructive vector copy");
1033  DstRegs.insert(Dst);
1034 #endif
1035  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1036  // VORR (NEON or MVE) takes two source operands.
1037  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1038  Mov.addReg(Src);
1039  }
1040  // MVE VORR takes predicate operands in place of an ordinary condition.
1041  if (Opc == ARM::MVE_VORR)
1042  addUnpredicatedMveVpredROp(Mov, Dst);
1043  else
1044  Mov = Mov.add(predOps(ARMCC::AL));
1045  // MOVr can set CC.
1046  if (Opc == ARM::MOVr)
1047  Mov = Mov.add(condCodeOp());
1048  }
1049  // Add implicit super-register defs and kills to the last instruction.
1050  Mov->addRegisterDefined(DestReg, TRI);
1051  if (KillSrc)
1052  Mov->addRegisterKilled(SrcReg, TRI);
1053 }
1054 
1057  // VMOVRRD is also a copy instruction but it requires
1058  // special way of handling. It is more complex copy version
1059  // and since that we are not considering it. For recognition
1060  // of such instruction isExtractSubregLike MI interface fuction
1061  // could be used.
1062  // VORRq is considered as a move only if two inputs are
1063  // the same register.
1064  if (!MI.isMoveReg() ||
1065  (MI.getOpcode() == ARM::VORRq &&
1066  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1067  return None;
1068  return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1069 }
1070 
1073  Register Reg) const {
1074  if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1075  Register DstReg = DstSrcPair->Destination->getReg();
1076 
1077  // TODO: We don't handle cases where the forwarding reg is narrower/wider
1078  // than the copy registers. Consider for example:
1079  //
1080  // s16 = VMOVS s0
1081  // s17 = VMOVS s1
1082  // call @callee(d0)
1083  //
1084  // We'd like to describe the call site value of d0 as d8, but this requires
1085  // gathering and merging the descriptions for the two VMOVS instructions.
1086  //
1087  // We also don't handle the reverse situation, where the forwarding reg is
1088  // narrower than the copy destination:
1089  //
1090  // d8 = VMOVD d0
1091  // call @callee(s1)
1092  //
1093  // We need to produce a fragment description (the call site value of s1 is
1094  // /not/ just d8).
1095  if (DstReg != Reg)
1096  return None;
1097  }
1099 }
1100 
1101 const MachineInstrBuilder &
1103  unsigned SubIdx, unsigned State,
1104  const TargetRegisterInfo *TRI) const {
1105  if (!SubIdx)
1106  return MIB.addReg(Reg, State);
1107 
1109  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1110  return MIB.addReg(Reg, State, SubIdx);
1111 }
1112 
1113 void ARMBaseInstrInfo::
1115  Register SrcReg, bool isKill, int FI,
1116  const TargetRegisterClass *RC,
1117  const TargetRegisterInfo *TRI) const {
1118  MachineFunction &MF = *MBB.getParent();
1119  MachineFrameInfo &MFI = MF.getFrameInfo();
1120  Align Alignment = MFI.getObjectAlign(FI);
1121 
1124  MFI.getObjectSize(FI), Alignment);
1125 
1126  switch (TRI->getSpillSize(*RC)) {
1127  case 2:
1128  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1129  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1130  .addReg(SrcReg, getKillRegState(isKill))
1131  .addFrameIndex(FI)
1132  .addImm(0)
1133  .addMemOperand(MMO)
1134  .add(predOps(ARMCC::AL));
1135  } else
1136  llvm_unreachable("Unknown reg class!");
1137  break;
1138  case 4:
1139  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1140  BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1141  .addReg(SrcReg, getKillRegState(isKill))
1142  .addFrameIndex(FI)
1143  .addImm(0)
1144  .addMemOperand(MMO)
1145  .add(predOps(ARMCC::AL));
1146  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1147  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1148  .addReg(SrcReg, getKillRegState(isKill))
1149  .addFrameIndex(FI)
1150  .addImm(0)
1151  .addMemOperand(MMO)
1152  .add(predOps(ARMCC::AL));
1153  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1154  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1155  .addReg(SrcReg, getKillRegState(isKill))
1156  .addFrameIndex(FI)
1157  .addImm(0)
1158  .addMemOperand(MMO)
1159  .add(predOps(ARMCC::AL));
1160  } else
1161  llvm_unreachable("Unknown reg class!");
1162  break;
1163  case 8:
1164  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1165  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1166  .addReg(SrcReg, getKillRegState(isKill))
1167  .addFrameIndex(FI)
1168  .addImm(0)
1169  .addMemOperand(MMO)
1170  .add(predOps(ARMCC::AL));
1171  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1172  if (Subtarget.hasV5TEOps()) {
1174  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1175  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1176  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1177  .add(predOps(ARMCC::AL));
1178  } else {
1179  // Fallback to STM instruction, which has existed since the dawn of
1180  // time.
1181  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1182  .addFrameIndex(FI)
1183  .addMemOperand(MMO)
1184  .add(predOps(ARMCC::AL));
1185  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1186  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1187  }
1188  } else
1189  llvm_unreachable("Unknown reg class!");
1190  break;
1191  case 16:
1192  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1193  // Use aligned spills if the stack can be realigned.
1194  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1195  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1196  .addFrameIndex(FI)
1197  .addImm(16)
1198  .addReg(SrcReg, getKillRegState(isKill))
1199  .addMemOperand(MMO)
1200  .add(predOps(ARMCC::AL));
1201  } else {
1202  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1203  .addReg(SrcReg, getKillRegState(isKill))
1204  .addFrameIndex(FI)
1205  .addMemOperand(MMO)
1206  .add(predOps(ARMCC::AL));
1207  }
1208  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1209  Subtarget.hasMVEIntegerOps()) {
1210  auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1211  MIB.addReg(SrcReg, getKillRegState(isKill))
1212  .addFrameIndex(FI)
1213  .addImm(0)
1214  .addMemOperand(MMO);
1216  } else
1217  llvm_unreachable("Unknown reg class!");
1218  break;
1219  case 24:
1220  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1221  // Use aligned spills if the stack can be realigned.
1222  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1223  Subtarget.hasNEON()) {
1224  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1225  .addFrameIndex(FI)
1226  .addImm(16)
1227  .addReg(SrcReg, getKillRegState(isKill))
1228  .addMemOperand(MMO)
1229  .add(predOps(ARMCC::AL));
1230  } else {
1232  get(ARM::VSTMDIA))
1233  .addFrameIndex(FI)
1234  .add(predOps(ARMCC::AL))
1235  .addMemOperand(MMO);
1236  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1237  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1238  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1239  }
1240  } else
1241  llvm_unreachable("Unknown reg class!");
1242  break;
1243  case 32:
1244  if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1245  ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1246  ARM::DQuadRegClass.hasSubClassEq(RC)) {
1247  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1248  Subtarget.hasNEON()) {
1249  // FIXME: It's possible to only store part of the QQ register if the
1250  // spilled def has a sub-register index.
1251  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1252  .addFrameIndex(FI)
1253  .addImm(16)
1254  .addReg(SrcReg, getKillRegState(isKill))
1255  .addMemOperand(MMO)
1256  .add(predOps(ARMCC::AL));
1257  } else if (Subtarget.hasMVEIntegerOps()) {
1258  BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1259  .addReg(SrcReg, getKillRegState(isKill))
1260  .addFrameIndex(FI)
1261  .addMemOperand(MMO);
1262  } else {
1264  get(ARM::VSTMDIA))
1265  .addFrameIndex(FI)
1266  .add(predOps(ARMCC::AL))
1267  .addMemOperand(MMO);
1268  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1269  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1270  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1271  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1272  }
1273  } else
1274  llvm_unreachable("Unknown reg class!");
1275  break;
1276  case 64:
1277  if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1278  Subtarget.hasMVEIntegerOps()) {
1279  BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1280  .addReg(SrcReg, getKillRegState(isKill))
1281  .addFrameIndex(FI)
1282  .addMemOperand(MMO);
1283  } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1284  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1285  .addFrameIndex(FI)
1286  .add(predOps(ARMCC::AL))
1287  .addMemOperand(MMO);
1288  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1289  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1290  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1291  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1292  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1293  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1294  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1295  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1296  } else
1297  llvm_unreachable("Unknown reg class!");
1298  break;
1299  default:
1300  llvm_unreachable("Unknown reg class!");
1301  }
1302 }
1303 
1305  int &FrameIndex) const {
1306  switch (MI.getOpcode()) {
1307  default: break;
1308  case ARM::STRrs:
1309  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1310  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1311  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1312  MI.getOperand(3).getImm() == 0) {
1313  FrameIndex = MI.getOperand(1).getIndex();
1314  return MI.getOperand(0).getReg();
1315  }
1316  break;
1317  case ARM::STRi12:
1318  case ARM::t2STRi12:
1319  case ARM::tSTRspi:
1320  case ARM::VSTRD:
1321  case ARM::VSTRS:
1322  case ARM::VSTR_P0_off:
1323  case ARM::MVE_VSTRWU32:
1324  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1325  MI.getOperand(2).getImm() == 0) {
1326  FrameIndex = MI.getOperand(1).getIndex();
1327  return MI.getOperand(0).getReg();
1328  }
1329  break;
1330  case ARM::VST1q64:
1331  case ARM::VST1d64TPseudo:
1332  case ARM::VST1d64QPseudo:
1333  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1334  FrameIndex = MI.getOperand(0).getIndex();
1335  return MI.getOperand(2).getReg();
1336  }
1337  break;
1338  case ARM::VSTMQIA:
1339  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1340  FrameIndex = MI.getOperand(1).getIndex();
1341  return MI.getOperand(0).getReg();
1342  }
1343  break;
1344  case ARM::MQQPRStore:
1345  case ARM::MQQQQPRStore:
1346  if (MI.getOperand(1).isFI()) {
1347  FrameIndex = MI.getOperand(1).getIndex();
1348  return MI.getOperand(0).getReg();
1349  }
1350  break;
1351  }
1352 
1353  return 0;
1354 }
1355 
1357  int &FrameIndex) const {
1359  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1360  Accesses.size() == 1) {
1361  FrameIndex =
1362  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1363  ->getFrameIndex();
1364  return true;
1365  }
1366  return false;
1367 }
1368 
1369 void ARMBaseInstrInfo::
1371  Register DestReg, int FI,
1372  const TargetRegisterClass *RC,
1373  const TargetRegisterInfo *TRI) const {
1374  DebugLoc DL;
1375  if (I != MBB.end()) DL = I->getDebugLoc();
1376  MachineFunction &MF = *MBB.getParent();
1377  MachineFrameInfo &MFI = MF.getFrameInfo();
1378  const Align Alignment = MFI.getObjectAlign(FI);
1381  MFI.getObjectSize(FI), Alignment);
1382 
1383  switch (TRI->getSpillSize(*RC)) {
1384  case 2:
1385  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1386  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1387  .addFrameIndex(FI)
1388  .addImm(0)
1389  .addMemOperand(MMO)
1390  .add(predOps(ARMCC::AL));
1391  } else
1392  llvm_unreachable("Unknown reg class!");
1393  break;
1394  case 4:
1395  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1396  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1397  .addFrameIndex(FI)
1398  .addImm(0)
1399  .addMemOperand(MMO)
1400  .add(predOps(ARMCC::AL));
1401  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1402  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1403  .addFrameIndex(FI)
1404  .addImm(0)
1405  .addMemOperand(MMO)
1406  .add(predOps(ARMCC::AL));
1407  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1408  BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1409  .addFrameIndex(FI)
1410  .addImm(0)
1411  .addMemOperand(MMO)
1412  .add(predOps(ARMCC::AL));
1413  } else
1414  llvm_unreachable("Unknown reg class!");
1415  break;
1416  case 8:
1417  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1418  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1419  .addFrameIndex(FI)
1420  .addImm(0)
1421  .addMemOperand(MMO)
1422  .add(predOps(ARMCC::AL));
1423  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1424  MachineInstrBuilder MIB;
1425 
1426  if (Subtarget.hasV5TEOps()) {
1427  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1428  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1429  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1430  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1431  .add(predOps(ARMCC::AL));
1432  } else {
1433  // Fallback to LDM instruction, which has existed since the dawn of
1434  // time.
1435  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1436  .addFrameIndex(FI)
1437  .addMemOperand(MMO)
1438  .add(predOps(ARMCC::AL));
1439  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1440  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1441  }
1442 
1443  if (Register::isPhysicalRegister(DestReg))
1444  MIB.addReg(DestReg, RegState::ImplicitDefine);
1445  } else
1446  llvm_unreachable("Unknown reg class!");
1447  break;
1448  case 16:
1449  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1450  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1451  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1452  .addFrameIndex(FI)
1453  .addImm(16)
1454  .addMemOperand(MMO)
1455  .add(predOps(ARMCC::AL));
1456  } else {
1457  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1458  .addFrameIndex(FI)
1459  .addMemOperand(MMO)
1460  .add(predOps(ARMCC::AL));
1461  }
1462  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1463  Subtarget.hasMVEIntegerOps()) {
1464  auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1465  MIB.addFrameIndex(FI)
1466  .addImm(0)
1467  .addMemOperand(MMO);
1469  } else
1470  llvm_unreachable("Unknown reg class!");
1471  break;
1472  case 24:
1473  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1474  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1475  Subtarget.hasNEON()) {
1476  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1477  .addFrameIndex(FI)
1478  .addImm(16)
1479  .addMemOperand(MMO)
1480  .add(predOps(ARMCC::AL));
1481  } else {
1482  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1483  .addFrameIndex(FI)
1484  .addMemOperand(MMO)
1485  .add(predOps(ARMCC::AL));
1486  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1487  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1488  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1489  if (Register::isPhysicalRegister(DestReg))
1490  MIB.addReg(DestReg, RegState::ImplicitDefine);
1491  }
1492  } else
1493  llvm_unreachable("Unknown reg class!");
1494  break;
1495  case 32:
1496  if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1497  ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1498  ARM::DQuadRegClass.hasSubClassEq(RC)) {
1499  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1500  Subtarget.hasNEON()) {
1501  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1502  .addFrameIndex(FI)
1503  .addImm(16)
1504  .addMemOperand(MMO)
1505  .add(predOps(ARMCC::AL));
1506  } else if (Subtarget.hasMVEIntegerOps()) {
1507  BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1508  .addFrameIndex(FI)
1509  .addMemOperand(MMO);
1510  } else {
1511  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1512  .addFrameIndex(FI)
1513  .add(predOps(ARMCC::AL))
1514  .addMemOperand(MMO);
1515  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1516  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1517  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1518  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1519  if (Register::isPhysicalRegister(DestReg))
1520  MIB.addReg(DestReg, RegState::ImplicitDefine);
1521  }
1522  } else
1523  llvm_unreachable("Unknown reg class!");
1524  break;
1525  case 64:
1526  if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1527  Subtarget.hasMVEIntegerOps()) {
1528  BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1529  .addFrameIndex(FI)
1530  .addMemOperand(MMO);
1531  } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1532  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1533  .addFrameIndex(FI)
1534  .add(predOps(ARMCC::AL))
1535  .addMemOperand(MMO);
1536  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1537  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1538  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1539  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1540  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1541  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1542  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1543  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1544  if (Register::isPhysicalRegister(DestReg))
1545  MIB.addReg(DestReg, RegState::ImplicitDefine);
1546  } else
1547  llvm_unreachable("Unknown reg class!");
1548  break;
1549  default:
1550  llvm_unreachable("Unknown regclass!");
1551  }
1552 }
1553 
1555  int &FrameIndex) const {
1556  switch (MI.getOpcode()) {
1557  default: break;
1558  case ARM::LDRrs:
1559  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1560  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1561  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1562  MI.getOperand(3).getImm() == 0) {
1563  FrameIndex = MI.getOperand(1).getIndex();
1564  return MI.getOperand(0).getReg();
1565  }
1566  break;
1567  case ARM::LDRi12:
1568  case ARM::t2LDRi12:
1569  case ARM::tLDRspi:
1570  case ARM::VLDRD:
1571  case ARM::VLDRS:
1572  case ARM::VLDR_P0_off:
1573  case ARM::MVE_VLDRWU32:
1574  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1575  MI.getOperand(2).getImm() == 0) {
1576  FrameIndex = MI.getOperand(1).getIndex();
1577  return MI.getOperand(0).getReg();
1578  }
1579  break;
1580  case ARM::VLD1q64:
1581  case ARM::VLD1d8TPseudo:
1582  case ARM::VLD1d16TPseudo:
1583  case ARM::VLD1d32TPseudo:
1584  case ARM::VLD1d64TPseudo:
1585  case ARM::VLD1d8QPseudo:
1586  case ARM::VLD1d16QPseudo:
1587  case ARM::VLD1d32QPseudo:
1588  case ARM::VLD1d64QPseudo:
1589  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1590  FrameIndex = MI.getOperand(1).getIndex();
1591  return MI.getOperand(0).getReg();
1592  }
1593  break;
1594  case ARM::VLDMQIA:
1595  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1596  FrameIndex = MI.getOperand(1).getIndex();
1597  return MI.getOperand(0).getReg();
1598  }
1599  break;
1600  case ARM::MQQPRLoad:
1601  case ARM::MQQQQPRLoad:
1602  if (MI.getOperand(1).isFI()) {
1603  FrameIndex = MI.getOperand(1).getIndex();
1604  return MI.getOperand(0).getReg();
1605  }
1606  break;
1607  }
1608 
1609  return 0;
1610 }
1611 
1613  int &FrameIndex) const {
1615  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1616  Accesses.size() == 1) {
1617  FrameIndex =
1618  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1619  ->getFrameIndex();
1620  return true;
1621  }
1622  return false;
1623 }
1624 
1625 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1626 /// depending on whether the result is used.
1627 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1628  bool isThumb1 = Subtarget.isThumb1Only();
1629  bool isThumb2 = Subtarget.isThumb2();
1630  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1631 
1632  DebugLoc dl = MI->getDebugLoc();
1633  MachineBasicBlock *BB = MI->getParent();
1634 
1635  MachineInstrBuilder LDM, STM;
1636  if (isThumb1 || !MI->getOperand(1).isDead()) {
1637  MachineOperand LDWb(MI->getOperand(1));
1638  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1639  : isThumb1 ? ARM::tLDMIA_UPD
1640  : ARM::LDMIA_UPD))
1641  .add(LDWb);
1642  } else {
1643  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1644  }
1645 
1646  if (isThumb1 || !MI->getOperand(0).isDead()) {
1647  MachineOperand STWb(MI->getOperand(0));
1648  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1649  : isThumb1 ? ARM::tSTMIA_UPD
1650  : ARM::STMIA_UPD))
1651  .add(STWb);
1652  } else {
1653  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1654  }
1655 
1656  MachineOperand LDBase(MI->getOperand(3));
1657  LDM.add(LDBase).add(predOps(ARMCC::AL));
1658 
1659  MachineOperand STBase(MI->getOperand(2));
1660  STM.add(STBase).add(predOps(ARMCC::AL));
1661 
1662  // Sort the scratch registers into ascending order.
1664  SmallVector<unsigned, 6> ScratchRegs;
1665  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1666  ScratchRegs.push_back(MI->getOperand(I).getReg());
1667  llvm::sort(ScratchRegs,
1668  [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1669  return TRI.getEncodingValue(Reg1) <
1670  TRI.getEncodingValue(Reg2);
1671  });
1672 
1673  for (const auto &Reg : ScratchRegs) {
1674  LDM.addReg(Reg, RegState::Define);
1675  STM.addReg(Reg, RegState::Kill);
1676  }
1677 
1678  BB->erase(MI);
1679 }
1680 
1682  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1683  expandLoadStackGuard(MI);
1684  MI.getParent()->erase(MI);
1685  return true;
1686  }
1687 
1688  if (MI.getOpcode() == ARM::MEMCPY) {
1689  expandMEMCPY(MI);
1690  return true;
1691  }
1692 
1693  // This hook gets to expand COPY instructions before they become
1694  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1695  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1696  // changed into a VORR that can go down the NEON pipeline.
1697  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1698  return false;
1699 
1700  // Look for a copy between even S-registers. That is where we keep floats
1701  // when using NEON v2f32 instructions for f32 arithmetic.
1702  Register DstRegS = MI.getOperand(0).getReg();
1703  Register SrcRegS = MI.getOperand(1).getReg();
1704  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1705  return false;
1706 
1708  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1709  &ARM::DPRRegClass);
1710  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1711  &ARM::DPRRegClass);
1712  if (!DstRegD || !SrcRegD)
1713  return false;
1714 
1715  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1716  // legal if the COPY already defines the full DstRegD, and it isn't a
1717  // sub-register insertion.
1718  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1719  return false;
1720 
1721  // A dead copy shouldn't show up here, but reject it just in case.
1722  if (MI.getOperand(0).isDead())
1723  return false;
1724 
1725  // All clear, widen the COPY.
1726  LLVM_DEBUG(dbgs() << "widening: " << MI);
1727  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1728 
1729  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1730  // or some other super-register.
1731  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1732  if (ImpDefIdx != -1)
1733  MI.RemoveOperand(ImpDefIdx);
1734 
1735  // Change the opcode and operands.
1736  MI.setDesc(get(ARM::VMOVD));
1737  MI.getOperand(0).setReg(DstRegD);
1738  MI.getOperand(1).setReg(SrcRegD);
1739  MIB.add(predOps(ARMCC::AL));
1740 
1741  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1742  // register scavenger and machine verifier, so we need to indicate that we
1743  // are reading an undefined value from SrcRegD, but a proper value from
1744  // SrcRegS.
1745  MI.getOperand(1).setIsUndef();
1746  MIB.addReg(SrcRegS, RegState::Implicit);
1747 
1748  // SrcRegD may actually contain an unrelated value in the ssub_1
1749  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1750  if (MI.getOperand(1).isKill()) {
1751  MI.getOperand(1).setIsKill(false);
1752  MI.addRegisterKilled(SrcRegS, TRI, true);
1753  }
1754 
1755  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1756  return true;
1757 }
1758 
1759 /// Create a copy of a const pool value. Update CPI to the new index and return
1760 /// the label UID.
1761 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1764 
1765  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1766  assert(MCPE.isMachineConstantPoolEntry() &&
1767  "Expecting a machine constantpool entry!");
1768  ARMConstantPoolValue *ACPV =
1769  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1770 
1771  unsigned PCLabelId = AFI->createPICLabelUId();
1772  ARMConstantPoolValue *NewCPV = nullptr;
1773 
1774  // FIXME: The below assumes PIC relocation model and that the function
1775  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1776  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1777  // instructions, so that's probably OK, but is PIC always correct when
1778  // we get here?
1779  if (ACPV->isGlobalValue())
1781  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1782  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1783  else if (ACPV->isExtSymbol())
1784  NewCPV = ARMConstantPoolSymbol::
1785  Create(MF.getFunction().getContext(),
1786  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1787  else if (ACPV->isBlockAddress())
1788  NewCPV = ARMConstantPoolConstant::
1789  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1791  else if (ACPV->isLSDA())
1792  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1793  ARMCP::CPLSDA, 4);
1794  else if (ACPV->isMachineBasicBlock())
1795  NewCPV = ARMConstantPoolMBB::
1796  Create(MF.getFunction().getContext(),
1797  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1798  else
1799  llvm_unreachable("Unexpected ARM constantpool value type!!");
1800  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1801  return PCLabelId;
1802 }
1803 
1806  Register DestReg, unsigned SubIdx,
1807  const MachineInstr &Orig,
1808  const TargetRegisterInfo &TRI) const {
1809  unsigned Opcode = Orig.getOpcode();
1810  switch (Opcode) {
1811  default: {
1813  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1814  MBB.insert(I, MI);
1815  break;
1816  }
1817  case ARM::tLDRpci_pic:
1818  case ARM::t2LDRpci_pic: {
1819  MachineFunction &MF = *MBB.getParent();
1820  unsigned CPI = Orig.getOperand(1).getIndex();
1821  unsigned PCLabelId = duplicateCPV(MF, CPI);
1822  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1823  .addConstantPoolIndex(CPI)
1824  .addImm(PCLabelId)
1825  .cloneMemRefs(Orig);
1826  break;
1827  }
1828  }
1829 }
1830 
1831 MachineInstr &
1833  MachineBasicBlock::iterator InsertBefore,
1834  const MachineInstr &Orig) const {
1835  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1837  for (;;) {
1838  switch (I->getOpcode()) {
1839  case ARM::tLDRpci_pic:
1840  case ARM::t2LDRpci_pic: {
1841  MachineFunction &MF = *MBB.getParent();
1842  unsigned CPI = I->getOperand(1).getIndex();
1843  unsigned PCLabelId = duplicateCPV(MF, CPI);
1844  I->getOperand(1).setIndex(CPI);
1845  I->getOperand(2).setImm(PCLabelId);
1846  break;
1847  }
1848  }
1849  if (!I->isBundledWithSucc())
1850  break;
1851  ++I;
1852  }
1853  return Cloned;
1854 }
1855 
1857  const MachineInstr &MI1,
1858  const MachineRegisterInfo *MRI) const {
1859  unsigned Opcode = MI0.getOpcode();
1860  if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1861  Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1862  Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1863  Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1864  Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1865  Opcode == ARM::t2MOV_ga_pcrel) {
1866  if (MI1.getOpcode() != Opcode)
1867  return false;
1868  if (MI0.getNumOperands() != MI1.getNumOperands())
1869  return false;
1870 
1871  const MachineOperand &MO0 = MI0.getOperand(1);
1872  const MachineOperand &MO1 = MI1.getOperand(1);
1873  if (MO0.getOffset() != MO1.getOffset())
1874  return false;
1875 
1876  if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1877  Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1878  Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1879  Opcode == ARM::t2MOV_ga_pcrel)
1880  // Ignore the PC labels.
1881  return MO0.getGlobal() == MO1.getGlobal();
1882 
1883  const MachineFunction *MF = MI0.getParent()->getParent();
1884  const MachineConstantPool *MCP = MF->getConstantPool();
1885  int CPI0 = MO0.getIndex();
1886  int CPI1 = MO1.getIndex();
1887  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1888  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1889  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1890  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1891  if (isARMCP0 && isARMCP1) {
1892  ARMConstantPoolValue *ACPV0 =
1893  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1894  ARMConstantPoolValue *ACPV1 =
1895  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1896  return ACPV0->hasSameValue(ACPV1);
1897  } else if (!isARMCP0 && !isARMCP1) {
1898  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1899  }
1900  return false;
1901  } else if (Opcode == ARM::PICLDR) {
1902  if (MI1.getOpcode() != Opcode)
1903  return false;
1904  if (MI0.getNumOperands() != MI1.getNumOperands())
1905  return false;
1906 
1907  Register Addr0 = MI0.getOperand(1).getReg();
1908  Register Addr1 = MI1.getOperand(1).getReg();
1909  if (Addr0 != Addr1) {
1910  if (!MRI || !Register::isVirtualRegister(Addr0) ||
1912  return false;
1913 
1914  // This assumes SSA form.
1915  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1916  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1917  // Check if the loaded value, e.g. a constantpool of a global address, are
1918  // the same.
1919  if (!produceSameValue(*Def0, *Def1, MRI))
1920  return false;
1921  }
1922 
1923  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1924  // %12 = PICLDR %11, 0, 14, %noreg
1925  const MachineOperand &MO0 = MI0.getOperand(i);
1926  const MachineOperand &MO1 = MI1.getOperand(i);
1927  if (!MO0.isIdenticalTo(MO1))
1928  return false;
1929  }
1930  return true;
1931  }
1932 
1934 }
1935 
1936 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1937 /// determine if two loads are loading from the same base address. It should
1938 /// only return true if the base pointers are the same and the only differences
1939 /// between the two addresses is the offset. It also returns the offsets by
1940 /// reference.
1941 ///
1942 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1943 /// is permanently disabled.
1945  int64_t &Offset1,
1946  int64_t &Offset2) const {
1947  // Don't worry about Thumb: just ARM and Thumb2.
1948  if (Subtarget.isThumb1Only()) return false;
1949 
1950  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1951  return false;
1952 
1953  switch (Load1->getMachineOpcode()) {
1954  default:
1955  return false;
1956  case ARM::LDRi12:
1957  case ARM::LDRBi12:
1958  case ARM::LDRD:
1959  case ARM::LDRH:
1960  case ARM::LDRSB:
1961  case ARM::LDRSH:
1962  case ARM::VLDRD:
1963  case ARM::VLDRS:
1964  case ARM::t2LDRi8:
1965  case ARM::t2LDRBi8:
1966  case ARM::t2LDRDi8:
1967  case ARM::t2LDRSHi8:
1968  case ARM::t2LDRi12:
1969  case ARM::t2LDRBi12:
1970  case ARM::t2LDRSHi12:
1971  break;
1972  }
1973 
1974  switch (Load2->getMachineOpcode()) {
1975  default:
1976  return false;
1977  case ARM::LDRi12:
1978  case ARM::LDRBi12:
1979  case ARM::LDRD:
1980  case ARM::LDRH:
1981  case ARM::LDRSB:
1982  case ARM::LDRSH:
1983  case ARM::VLDRD:
1984  case ARM::VLDRS:
1985  case ARM::t2LDRi8:
1986  case ARM::t2LDRBi8:
1987  case ARM::t2LDRSHi8:
1988  case ARM::t2LDRi12:
1989  case ARM::t2LDRBi12:
1990  case ARM::t2LDRSHi12:
1991  break;
1992  }
1993 
1994  // Check if base addresses and chain operands match.
1995  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1996  Load1->getOperand(4) != Load2->getOperand(4))
1997  return false;
1998 
1999  // Index should be Reg0.
2000  if (Load1->getOperand(3) != Load2->getOperand(3))
2001  return false;
2002 
2003  // Determine the offsets.
2004  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
2005  isa<ConstantSDNode>(Load2->getOperand(1))) {
2006  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
2007  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2008  return true;
2009  }
2010 
2011  return false;
2012 }
2013 
2014 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2015 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2016 /// be scheduled togther. On some targets if two loads are loading from
2017 /// addresses in the same cache line, it's better if they are scheduled
2018 /// together. This function takes two integers that represent the load offsets
2019 /// from the common base address. It returns true if it decides it's desirable
2020 /// to schedule the two loads together. "NumLoads" is the number of loads that
2021 /// have already been scheduled after Load1.
2022 ///
2023 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2024 /// is permanently disabled.
2026  int64_t Offset1, int64_t Offset2,
2027  unsigned NumLoads) const {
2028  // Don't worry about Thumb: just ARM and Thumb2.
2029  if (Subtarget.isThumb1Only()) return false;
2030 
2031  assert(Offset2 > Offset1);
2032 
2033  if ((Offset2 - Offset1) / 8 > 64)
2034  return false;
2035 
2036  // Check if the machine opcodes are different. If they are different
2037  // then we consider them to not be of the same base address,
2038  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2039  // In this case, they are considered to be the same because they are different
2040  // encoding forms of the same basic instruction.
2041  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2042  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2043  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2044  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2045  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2046  return false; // FIXME: overly conservative?
2047 
2048  // Four loads in a row should be sufficient.
2049  if (NumLoads >= 3)
2050  return false;
2051 
2052  return true;
2053 }
2054 
2056  const MachineBasicBlock *MBB,
2057  const MachineFunction &MF) const {
2058  // Debug info is never a scheduling boundary. It's necessary to be explicit
2059  // due to the special treatment of IT instructions below, otherwise a
2060  // dbg_value followed by an IT will result in the IT instruction being
2061  // considered a scheduling hazard, which is wrong. It should be the actual
2062  // instruction preceding the dbg_value instruction(s), just like it is
2063  // when debug info is not present.
2064  if (MI.isDebugInstr())
2065  return false;
2066 
2067  // Terminators and labels can't be scheduled around.
2068  if (MI.isTerminator() || MI.isPosition())
2069  return true;
2070 
2071  // INLINEASM_BR can jump to another block
2072  if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2073  return true;
2074 
2075  // Treat the start of the IT block as a scheduling boundary, but schedule
2076  // t2IT along with all instructions following it.
2077  // FIXME: This is a big hammer. But the alternative is to add all potential
2078  // true and anti dependencies to IT block instructions as implicit operands
2079  // to the t2IT instruction. The added compile time and complexity does not
2080  // seem worth it.
2082  // Make sure to skip any debug instructions
2083  while (++I != MBB->end() && I->isDebugInstr())
2084  ;
2085  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2086  return true;
2087 
2088  // Don't attempt to schedule around any instruction that defines
2089  // a stack-oriented pointer, as it's unlikely to be profitable. This
2090  // saves compile time, because it doesn't require every single
2091  // stack slot reference to depend on the instruction that does the
2092  // modification.
2093  // Calls don't actually change the stack pointer, even if they have imp-defs.
2094  // No ARM calling conventions change the stack pointer. (X86 calling
2095  // conventions sometimes do).
2096  if (!MI.isCall() && MI.definesRegister(ARM::SP))
2097  return true;
2098 
2099  return false;
2100 }
2101 
2102 bool ARMBaseInstrInfo::
2104  unsigned NumCycles, unsigned ExtraPredCycles,
2105  BranchProbability Probability) const {
2106  if (!NumCycles)
2107  return false;
2108 
2109  // If we are optimizing for size, see if the branch in the predecessor can be
2110  // lowered to cbn?z by the constant island lowering pass, and return false if
2111  // so. This results in a shorter instruction sequence.
2112  if (MBB.getParent()->getFunction().hasOptSize()) {
2113  MachineBasicBlock *Pred = *MBB.pred_begin();
2114  if (!Pred->empty()) {
2115  MachineInstr *LastMI = &*Pred->rbegin();
2116  if (LastMI->getOpcode() == ARM::t2Bcc) {
2118  MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2119  if (CmpMI)
2120  return false;
2121  }
2122  }
2123  }
2124  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2125  MBB, 0, 0, Probability);
2126 }
2127 
2128 bool ARMBaseInstrInfo::
2130  unsigned TCycles, unsigned TExtra,
2131  MachineBasicBlock &FBB,
2132  unsigned FCycles, unsigned FExtra,
2133  BranchProbability Probability) const {
2134  if (!TCycles)
2135  return false;
2136 
2137  // In thumb code we often end up trading one branch for a IT block, and
2138  // if we are cloning the instruction can increase code size. Prevent
2139  // blocks with multiple predecesors from being ifcvted to prevent this
2140  // cloning.
2141  if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2142  if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2143  return false;
2144  }
2145 
2146  // Attempt to estimate the relative costs of predication versus branching.
2147  // Here we scale up each component of UnpredCost to avoid precision issue when
2148  // scaling TCycles/FCycles by Probability.
2149  const unsigned ScalingUpFactor = 1024;
2150 
2151  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2152  unsigned UnpredCost;
2153  if (!Subtarget.hasBranchPredictor()) {
2154  // When we don't have a branch predictor it's always cheaper to not take a
2155  // branch than take it, so we have to take that into account.
2156  unsigned NotTakenBranchCost = 1;
2157  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2158  unsigned TUnpredCycles, FUnpredCycles;
2159  if (!FCycles) {
2160  // Triangle: TBB is the fallthrough
2161  TUnpredCycles = TCycles + NotTakenBranchCost;
2162  FUnpredCycles = TakenBranchCost;
2163  } else {
2164  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2165  TUnpredCycles = TCycles + TakenBranchCost;
2166  FUnpredCycles = FCycles + NotTakenBranchCost;
2167  // The branch at the end of FBB will disappear when it's predicated, so
2168  // discount it from PredCost.
2169  PredCost -= 1 * ScalingUpFactor;
2170  }
2171  // The total cost is the cost of each path scaled by their probabilites
2172  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2173  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2174  UnpredCost = TUnpredCost + FUnpredCost;
2175  // When predicating assume that the first IT can be folded away but later
2176  // ones cost one cycle each
2177  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2178  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2179  }
2180  } else {
2181  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2182  unsigned FUnpredCost =
2183  Probability.getCompl().scale(FCycles * ScalingUpFactor);
2184  UnpredCost = TUnpredCost + FUnpredCost;
2185  UnpredCost += 1 * ScalingUpFactor; // The branch itself
2186  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2187  }
2188 
2189  return PredCost <= UnpredCost;
2190 }
2191 
2192 unsigned
2194  unsigned NumInsts) const {
2195  // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2196  // ARM has a condition code field in every predicable instruction, using it
2197  // doesn't change code size.
2198  if (!Subtarget.isThumb2())
2199  return 0;
2200 
2201  // It's possible that the size of the IT is restricted to a single block.
2202  unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2203  return divideCeil(NumInsts, MaxInsts) * 2;
2204 }
2205 
2206 unsigned
2208  // If this branch is likely to be folded into the comparison to form a
2209  // CB(N)Z, then removing it won't reduce code size at all, because that will
2210  // just replace the CB(N)Z with a CMP.
2211  if (MI.getOpcode() == ARM::t2Bcc &&
2213  return 0;
2214 
2215  unsigned Size = getInstSizeInBytes(MI);
2216 
2217  // For Thumb2, all branches are 32-bit instructions during the if conversion
2218  // pass, but may be replaced with 16-bit instructions during size reduction.
2219  // Since the branches considered by if conversion tend to be forward branches
2220  // over small basic blocks, they are very likely to be in range for the
2221  // narrow instructions, so we assume the final code size will be half what it
2222  // currently is.
2223  if (Subtarget.isThumb2())
2224  Size /= 2;
2225 
2226  return Size;
2227 }
2228 
2229 bool
2231  MachineBasicBlock &FMBB) const {
2232  // Reduce false anti-dependencies to let the target's out-of-order execution
2233  // engine do its thing.
2234  return Subtarget.isProfitableToUnpredicate();
2235 }
2236 
2237 /// getInstrPredicate - If instruction is predicated, returns its predicate
2238 /// condition, otherwise returns AL. It also returns the condition code
2239 /// register by reference.
2241  Register &PredReg) {
2242  int PIdx = MI.findFirstPredOperandIdx();
2243  if (PIdx == -1) {
2244  PredReg = 0;
2245  return ARMCC::AL;
2246  }
2247 
2248  PredReg = MI.getOperand(PIdx+1).getReg();
2249  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2250 }
2251 
2252 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2253  if (Opc == ARM::B)
2254  return ARM::Bcc;
2255  if (Opc == ARM::tB)
2256  return ARM::tBcc;
2257  if (Opc == ARM::t2B)
2258  return ARM::t2Bcc;
2259 
2260  llvm_unreachable("Unknown unconditional branch opcode!");
2261 }
2262 
2264  bool NewMI,
2265  unsigned OpIdx1,
2266  unsigned OpIdx2) const {
2267  switch (MI.getOpcode()) {
2268  case ARM::MOVCCr:
2269  case ARM::t2MOVCCr: {
2270  // MOVCC can be commuted by inverting the condition.
2271  Register PredReg;
2272  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2273  // MOVCC AL can't be inverted. Shouldn't happen.
2274  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2275  return nullptr;
2276  MachineInstr *CommutedMI =
2277  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2278  if (!CommutedMI)
2279  return nullptr;
2280  // After swapping the MOVCC operands, also invert the condition.
2281  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2283  return CommutedMI;
2284  }
2285  }
2286  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2287 }
2288 
2289 /// Identify instructions that can be folded into a MOVCC instruction, and
2290 /// return the defining instruction.
2291 MachineInstr *
2292 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2293  const TargetInstrInfo *TII) const {
2294  if (!Reg.isVirtual())
2295  return nullptr;
2296  if (!MRI.hasOneNonDBGUse(Reg))
2297  return nullptr;
2299  if (!MI)
2300  return nullptr;
2301  // Check if MI can be predicated and folded into the MOVCC.
2302  if (!isPredicable(*MI))
2303  return nullptr;
2304  // Check if MI has any non-dead defs or physreg uses. This also detects
2305  // predicated instructions which will be reading CPSR.
2306  for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2307  // Reject frame index operands, PEI can't handle the predicated pseudos.
2308  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2309  return nullptr;
2310  if (!MO.isReg())
2311  continue;
2312  // MI can't have any tied operands, that would conflict with predication.
2313  if (MO.isTied())
2314  return nullptr;
2315  if (Register::isPhysicalRegister(MO.getReg()))
2316  return nullptr;
2317  if (MO.isDef() && !MO.isDead())
2318  return nullptr;
2319  }
2320  bool DontMoveAcrossStores = true;
2321  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2322  return nullptr;
2323  return MI;
2324 }
2325 
2328  unsigned &TrueOp, unsigned &FalseOp,
2329  bool &Optimizable) const {
2330  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2331  "Unknown select instruction");
2332  // MOVCC operands:
2333  // 0: Def.
2334  // 1: True use.
2335  // 2: False use.
2336  // 3: Condition code.
2337  // 4: CPSR use.
2338  TrueOp = 1;
2339  FalseOp = 2;
2340  Cond.push_back(MI.getOperand(3));
2341  Cond.push_back(MI.getOperand(4));
2342  // We can always fold a def.
2343  Optimizable = true;
2344  return false;
2345 }
2346 
2347 MachineInstr *
2350  bool PreferFalse) const {
2351  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2352  "Unknown select instruction");
2353  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2354  MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2355  bool Invert = !DefMI;
2356  if (!DefMI)
2357  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2358  if (!DefMI)
2359  return nullptr;
2360 
2361  // Find new register class to use.
2362  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2363  MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2364  Register DestReg = MI.getOperand(0).getReg();
2365  const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2366  const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2367  if (!MRI.constrainRegClass(DestReg, FalseClass))
2368  return nullptr;
2369  if (!MRI.constrainRegClass(DestReg, TrueClass))
2370  return nullptr;
2371 
2372  // Create a new predicated version of DefMI.
2373  // Rfalse is the first use.
2374  MachineInstrBuilder NewMI =
2375  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2376 
2377  // Copy all the DefMI operands, excluding its (null) predicate.
2378  const MCInstrDesc &DefDesc = DefMI->getDesc();
2379  for (unsigned i = 1, e = DefDesc.getNumOperands();
2380  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2381  NewMI.add(DefMI->getOperand(i));
2382 
2383  unsigned CondCode = MI.getOperand(3).getImm();
2384  if (Invert)
2386  else
2387  NewMI.addImm(CondCode);
2388  NewMI.add(MI.getOperand(4));
2389 
2390  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2391  if (NewMI->hasOptionalDef())
2392  NewMI.add(condCodeOp());
2393 
2394  // The output register value when the predicate is false is an implicit
2395  // register operand tied to the first def.
2396  // The tie makes the register allocator ensure the FalseReg is allocated the
2397  // same register as operand 0.
2398  FalseReg.setImplicit();
2399  NewMI.add(FalseReg);
2400  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2401 
2402  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2403  SeenMIs.insert(NewMI);
2404  SeenMIs.erase(DefMI);
2405 
2406  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2407  // DefMI would be invalid when tranferred inside the loop. Checking for a
2408  // loop is expensive, but at least remove kill flags if they are in different
2409  // BBs.
2410  if (DefMI->getParent() != MI.getParent())
2411  NewMI->clearKillInfo();
2412 
2413  // The caller will erase MI, but not DefMI.
2415  return NewMI;
2416 }
2417 
2418 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2419 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2420 /// def operand.
2421 ///
2422 /// This will go away once we can teach tblgen how to set the optional CPSR def
2423 /// operand itself.
2427 };
2428 
2430  {ARM::ADDSri, ARM::ADDri},
2431  {ARM::ADDSrr, ARM::ADDrr},
2432  {ARM::ADDSrsi, ARM::ADDrsi},
2433  {ARM::ADDSrsr, ARM::ADDrsr},
2434 
2435  {ARM::SUBSri, ARM::SUBri},
2436  {ARM::SUBSrr, ARM::SUBrr},
2437  {ARM::SUBSrsi, ARM::SUBrsi},
2438  {ARM::SUBSrsr, ARM::SUBrsr},
2439 
2440  {ARM::RSBSri, ARM::RSBri},
2441  {ARM::RSBSrsi, ARM::RSBrsi},
2442  {ARM::RSBSrsr, ARM::RSBrsr},
2443 
2444  {ARM::tADDSi3, ARM::tADDi3},
2445  {ARM::tADDSi8, ARM::tADDi8},
2446  {ARM::tADDSrr, ARM::tADDrr},
2447  {ARM::tADCS, ARM::tADC},
2448 
2449  {ARM::tSUBSi3, ARM::tSUBi3},
2450  {ARM::tSUBSi8, ARM::tSUBi8},
2451  {ARM::tSUBSrr, ARM::tSUBrr},
2452  {ARM::tSBCS, ARM::tSBC},
2453  {ARM::tRSBS, ARM::tRSB},
2454  {ARM::tLSLSri, ARM::tLSLri},
2455 
2456  {ARM::t2ADDSri, ARM::t2ADDri},
2457  {ARM::t2ADDSrr, ARM::t2ADDrr},
2458  {ARM::t2ADDSrs, ARM::t2ADDrs},
2459 
2460  {ARM::t2SUBSri, ARM::t2SUBri},
2461  {ARM::t2SUBSrr, ARM::t2SUBrr},
2462  {ARM::t2SUBSrs, ARM::t2SUBrs},
2463 
2464  {ARM::t2RSBSri, ARM::t2RSBri},
2465  {ARM::t2RSBSrs, ARM::t2RSBrs},
2466 };
2467 
2468 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2469  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2470  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2472  return 0;
2473 }
2474 
2477  const DebugLoc &dl, Register DestReg,
2478  Register BaseReg, int NumBytes,
2479  ARMCC::CondCodes Pred, Register PredReg,
2480  const ARMBaseInstrInfo &TII,
2481  unsigned MIFlags) {
2482  if (NumBytes == 0 && DestReg != BaseReg) {
2483  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2484  .addReg(BaseReg, RegState::Kill)
2485  .add(predOps(Pred, PredReg))
2486  .add(condCodeOp())
2487  .setMIFlags(MIFlags);
2488  return;
2489  }
2490 
2491  bool isSub = NumBytes < 0;
2492  if (isSub) NumBytes = -NumBytes;
2493 
2494  while (NumBytes) {
2495  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2496  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2497  assert(ThisVal && "Didn't extract field correctly");
2498 
2499  // We will handle these bits from offset, clear them.
2500  NumBytes &= ~ThisVal;
2501 
2502  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2503 
2504  // Build the new ADD / SUB.
2505  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2506  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2507  .addReg(BaseReg, RegState::Kill)
2508  .addImm(ThisVal)
2509  .add(predOps(Pred, PredReg))
2510  .add(condCodeOp())
2511  .setMIFlags(MIFlags);
2512  BaseReg = DestReg;
2513  }
2514 }
2515 
2518  unsigned NumBytes) {
2519  // This optimisation potentially adds lots of load and store
2520  // micro-operations, it's only really a great benefit to code-size.
2521  if (!Subtarget.hasMinSize())
2522  return false;
2523 
2524  // If only one register is pushed/popped, LLVM can use an LDR/STR
2525  // instead. We can't modify those so make sure we're dealing with an
2526  // instruction we understand.
2527  bool IsPop = isPopOpcode(MI->getOpcode());
2528  bool IsPush = isPushOpcode(MI->getOpcode());
2529  if (!IsPush && !IsPop)
2530  return false;
2531 
2532  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2533  MI->getOpcode() == ARM::VLDMDIA_UPD;
2534  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2535  MI->getOpcode() == ARM::tPOP ||
2536  MI->getOpcode() == ARM::tPOP_RET;
2537 
2538  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2539  MI->getOperand(1).getReg() == ARM::SP)) &&
2540  "trying to fold sp update into non-sp-updating push/pop");
2541 
2542  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2543  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2544  // if this is violated.
2545  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2546  return false;
2547 
2548  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2549  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2550  int RegListIdx = IsT1PushPop ? 2 : 4;
2551 
2552  // Calculate the space we'll need in terms of registers.
2553  unsigned RegsNeeded;
2554  const TargetRegisterClass *RegClass;
2555  if (IsVFPPushPop) {
2556  RegsNeeded = NumBytes / 8;
2557  RegClass = &ARM::DPRRegClass;
2558  } else {
2559  RegsNeeded = NumBytes / 4;
2560  RegClass = &ARM::GPRRegClass;
2561  }
2562 
2563  // We're going to have to strip all list operands off before
2564  // re-adding them since the order matters, so save the existing ones
2565  // for later.
2567 
2568  // We're also going to need the first register transferred by this
2569  // instruction, which won't necessarily be the first register in the list.
2570  unsigned FirstRegEnc = -1;
2571 
2573  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2574  MachineOperand &MO = MI->getOperand(i);
2575  RegList.push_back(MO);
2576 
2577  if (MO.isReg() && !MO.isImplicit() &&
2578  TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2579  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2580  }
2581 
2582  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2583 
2584  // Now try to find enough space in the reglist to allocate NumBytes.
2585  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2586  --CurRegEnc) {
2587  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2588  if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2589  continue;
2590  if (!IsPop) {
2591  // Pushing any register is completely harmless, mark the register involved
2592  // as undef since we don't care about its value and must not restore it
2593  // during stack unwinding.
2594  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2595  false, false, true));
2596  --RegsNeeded;
2597  continue;
2598  }
2599 
2600  // However, we can only pop an extra register if it's not live. For
2601  // registers live within the function we might clobber a return value
2602  // register; the other way a register can be live here is if it's
2603  // callee-saved.
2604  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2605  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2607  // VFP pops don't allow holes in the register list, so any skip is fatal
2608  // for our transformation. GPR pops do, so we should just keep looking.
2609  if (IsVFPPushPop)
2610  return false;
2611  else
2612  continue;
2613  }
2614 
2615  // Mark the unimportant registers as <def,dead> in the POP.
2616  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2617  true));
2618  --RegsNeeded;
2619  }
2620 
2621  if (RegsNeeded > 0)
2622  return false;
2623 
2624  // Finally we know we can profitably perform the optimisation so go
2625  // ahead: strip all existing registers off and add them back again
2626  // in the right order.
2627  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2628  MI->RemoveOperand(i);
2629 
2630  // Add the complete list back in.
2631  MachineInstrBuilder MIB(MF, &*MI);
2632  for (const MachineOperand &MO : llvm::reverse(RegList))
2633  MIB.add(MO);
2634 
2635  return true;
2636 }
2637 
2638 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2639  Register FrameReg, int &Offset,
2640  const ARMBaseInstrInfo &TII) {
2641  unsigned Opcode = MI.getOpcode();
2642  const MCInstrDesc &Desc = MI.getDesc();
2643  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2644  bool isSub = false;
2645 
2646  // Memory operands in inline assembly always use AddrMode2.
2647  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2649 
2650  if (Opcode == ARM::ADDri) {
2651  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2652  if (Offset == 0) {
2653  // Turn it into a move.
2654  MI.setDesc(TII.get(ARM::MOVr));
2655  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2656  MI.RemoveOperand(FrameRegIdx+1);
2657  Offset = 0;
2658  return true;
2659  } else if (Offset < 0) {
2660  Offset = -Offset;
2661  isSub = true;
2662  MI.setDesc(TII.get(ARM::SUBri));
2663  }
2664 
2665  // Common case: small offset, fits into instruction.
2666  if (ARM_AM::getSOImmVal(Offset) != -1) {
2667  // Replace the FrameIndex with sp / fp
2668  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2669  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2670  Offset = 0;
2671  return true;
2672  }
2673 
2674  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2675  // as possible.
2676  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2677  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2678 
2679  // We will handle these bits from offset, clear them.
2680  Offset &= ~ThisImmVal;
2681 
2682  // Get the properly encoded SOImmVal field.
2683  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2684  "Bit extraction didn't work?");
2685  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2686  } else {
2687  unsigned ImmIdx = 0;
2688  int InstrOffs = 0;
2689  unsigned NumBits = 0;
2690  unsigned Scale = 1;
2691  switch (AddrMode) {
2692  case ARMII::AddrMode_i12:
2693  ImmIdx = FrameRegIdx + 1;
2694  InstrOffs = MI.getOperand(ImmIdx).getImm();
2695  NumBits = 12;
2696  break;
2697  case ARMII::AddrMode2:
2698  ImmIdx = FrameRegIdx+2;
2699  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2700  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2701  InstrOffs *= -1;
2702  NumBits = 12;
2703  break;
2704  case ARMII::AddrMode3:
2705  ImmIdx = FrameRegIdx+2;
2706  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2707  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2708  InstrOffs *= -1;
2709  NumBits = 8;
2710  break;
2711  case ARMII::AddrMode4:
2712  case ARMII::AddrMode6:
2713  // Can't fold any offset even if it's zero.
2714  return false;
2715  case ARMII::AddrMode5:
2716  ImmIdx = FrameRegIdx+1;
2717  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2718  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2719  InstrOffs *= -1;
2720  NumBits = 8;
2721  Scale = 4;
2722  break;
2723  case ARMII::AddrMode5FP16:
2724  ImmIdx = FrameRegIdx+1;
2725  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2726  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2727  InstrOffs *= -1;
2728  NumBits = 8;
2729  Scale = 2;
2730  break;
2731  case ARMII::AddrModeT2_i7:
2734  ImmIdx = FrameRegIdx+1;
2735  InstrOffs = MI.getOperand(ImmIdx).getImm();
2736  NumBits = 7;
2737  Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2738  AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2739  break;
2740  default:
2741  llvm_unreachable("Unsupported addressing mode!");
2742  }
2743 
2744  Offset += InstrOffs * Scale;
2745  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2746  if (Offset < 0) {
2747  Offset = -Offset;
2748  isSub = true;
2749  }
2750 
2751  // Attempt to fold address comp. if opcode has offset bits
2752  if (NumBits > 0) {
2753  // Common case: small offset, fits into instruction.
2754  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2755  int ImmedOffset = Offset / Scale;
2756  unsigned Mask = (1 << NumBits) - 1;
2757  if ((unsigned)Offset <= Mask * Scale) {
2758  // Replace the FrameIndex with sp
2759  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2760  // FIXME: When addrmode2 goes away, this will simplify (like the
2761  // T2 version), as the LDR.i12 versions don't need the encoding
2762  // tricks for the offset value.
2763  if (isSub) {
2765  ImmedOffset = -ImmedOffset;
2766  else
2767  ImmedOffset |= 1 << NumBits;
2768  }
2769  ImmOp.ChangeToImmediate(ImmedOffset);
2770  Offset = 0;
2771  return true;
2772  }
2773 
2774  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2775  ImmedOffset = ImmedOffset & Mask;
2776  if (isSub) {
2778  ImmedOffset = -ImmedOffset;
2779  else
2780  ImmedOffset |= 1 << NumBits;
2781  }
2782  ImmOp.ChangeToImmediate(ImmedOffset);
2783  Offset &= ~(Mask*Scale);
2784  }
2785  }
2786 
2787  Offset = (isSub) ? -Offset : Offset;
2788  return Offset == 0;
2789 }
2790 
2791 /// analyzeCompare - For a comparison instruction, return the source registers
2792 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2793 /// compares against in CmpValue. Return true if the comparison instruction
2794 /// can be analyzed.
2796  Register &SrcReg2, int64_t &CmpMask,
2797  int64_t &CmpValue) const {
2798  switch (MI.getOpcode()) {
2799  default: break;
2800  case ARM::CMPri:
2801  case ARM::t2CMPri:
2802  case ARM::tCMPi8:
2803  SrcReg = MI.getOperand(0).getReg();
2804  SrcReg2 = 0;
2805  CmpMask = ~0;
2806  CmpValue = MI.getOperand(1).getImm();
2807  return true;
2808  case ARM::CMPrr:
2809  case ARM::t2CMPrr:
2810  case ARM::tCMPr:
2811  SrcReg = MI.getOperand(0).getReg();
2812  SrcReg2 = MI.getOperand(1).getReg();
2813  CmpMask = ~0;
2814  CmpValue = 0;
2815  return true;
2816  case ARM::TSTri:
2817  case ARM::t2TSTri:
2818  SrcReg = MI.getOperand(0).getReg();
2819  SrcReg2 = 0;
2820  CmpMask = MI.getOperand(1).getImm();
2821  CmpValue = 0;
2822  return true;
2823  }
2824 
2825  return false;
2826 }
2827 
2828 /// isSuitableForMask - Identify a suitable 'and' instruction that
2829 /// operates on the given source register and applies the same mask
2830 /// as a 'tst' instruction. Provide a limited look-through for copies.
2831 /// When successful, MI will hold the found instruction.
2833  int CmpMask, bool CommonUse) {
2834  switch (MI->getOpcode()) {
2835  case ARM::ANDri:
2836  case ARM::t2ANDri:
2837  if (CmpMask != MI->getOperand(2).getImm())
2838  return false;
2839  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2840  return true;
2841  break;
2842  }
2843 
2844  return false;
2845 }
2846 
2847 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2848 /// the condition code if we modify the instructions such that flags are
2849 /// set by ADD(a,b,X).
2851  switch (CC) {
2852  default: return ARMCC::AL;
2853  case ARMCC::HS: return ARMCC::LO;
2854  case ARMCC::LO: return ARMCC::HS;
2855  case ARMCC::VS: return ARMCC::VS;
2856  case ARMCC::VC: return ARMCC::VC;
2857  }
2858 }
2859 
2860 /// isRedundantFlagInstr - check whether the first instruction, whose only
2861 /// purpose is to update flags, can be made redundant.
2862 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2863 /// CMPri can be made redundant by SUBri if the operands are the same.
2864 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2865 /// This function can be extended later on.
2866 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2867  Register SrcReg, Register SrcReg2,
2868  int64_t ImmValue,
2869  const MachineInstr *OI,
2870  bool &IsThumb1) {
2871  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2872  (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2873  ((OI->getOperand(1).getReg() == SrcReg &&
2874  OI->getOperand(2).getReg() == SrcReg2) ||
2875  (OI->getOperand(1).getReg() == SrcReg2 &&
2876  OI->getOperand(2).getReg() == SrcReg))) {
2877  IsThumb1 = false;
2878  return true;
2879  }
2880 
2881  if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2882  ((OI->getOperand(2).getReg() == SrcReg &&
2883  OI->getOperand(3).getReg() == SrcReg2) ||
2884  (OI->getOperand(2).getReg() == SrcReg2 &&
2885  OI->getOperand(3).getReg() == SrcReg))) {
2886  IsThumb1 = true;
2887  return true;
2888  }
2889 
2890  if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2891  (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2892  OI->getOperand(1).getReg() == SrcReg &&
2893  OI->getOperand(2).getImm() == ImmValue) {
2894  IsThumb1 = false;
2895  return true;
2896  }
2897 
2898  if (CmpI->getOpcode() == ARM::tCMPi8 &&
2899  (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2900  OI->getOperand(2).getReg() == SrcReg &&
2901  OI->getOperand(3).getImm() == ImmValue) {
2902  IsThumb1 = true;
2903  return true;
2904  }
2905 
2906  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2907  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2908  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2909  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2910  OI->getOperand(0).getReg() == SrcReg &&
2911  OI->getOperand(1).getReg() == SrcReg2) {
2912  IsThumb1 = false;
2913  return true;
2914  }
2915 
2916  if (CmpI->getOpcode() == ARM::tCMPr &&
2917  (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2918  OI->getOpcode() == ARM::tADDrr) &&
2919  OI->getOperand(0).getReg() == SrcReg &&
2920  OI->getOperand(2).getReg() == SrcReg2) {
2921  IsThumb1 = true;
2922  return true;
2923  }
2924 
2925  return false;
2926 }
2927 
2928 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2929  switch (MI->getOpcode()) {
2930  default: return false;
2931  case ARM::tLSLri:
2932  case ARM::tLSRri:
2933  case ARM::tLSLrr:
2934  case ARM::tLSRrr:
2935  case ARM::tSUBrr:
2936  case ARM::tADDrr:
2937  case ARM::tADDi3:
2938  case ARM::tADDi8:
2939  case ARM::tSUBi3:
2940  case ARM::tSUBi8:
2941  case ARM::tMUL:
2942  case ARM::tADC:
2943  case ARM::tSBC:
2944  case ARM::tRSB:
2945  case ARM::tAND:
2946  case ARM::tORR:
2947  case ARM::tEOR:
2948  case ARM::tBIC:
2949  case ARM::tMVN:
2950  case ARM::tASRri:
2951  case ARM::tASRrr:
2952  case ARM::tROR:
2953  IsThumb1 = true;
2955  case ARM::RSBrr:
2956  case ARM::RSBri:
2957  case ARM::RSCrr:
2958  case ARM::RSCri:
2959  case ARM::ADDrr:
2960  case ARM::ADDri:
2961  case ARM::ADCrr:
2962  case ARM::ADCri:
2963  case ARM::SUBrr:
2964  case ARM::SUBri:
2965  case ARM::SBCrr:
2966  case ARM::SBCri:
2967  case ARM::t2RSBri:
2968  case ARM::t2ADDrr:
2969  case ARM::t2ADDri:
2970  case ARM::t2ADCrr:
2971  case ARM::t2ADCri:
2972  case ARM::t2SUBrr:
2973  case ARM::t2SUBri:
2974  case ARM::t2SBCrr:
2975  case ARM::t2SBCri:
2976  case ARM::ANDrr:
2977  case ARM::ANDri:
2978  case ARM::t2ANDrr:
2979  case ARM::t2ANDri:
2980  case ARM::ORRrr:
2981  case ARM::ORRri:
2982  case ARM::t2ORRrr:
2983  case ARM::t2ORRri:
2984  case ARM::EORrr:
2985  case ARM::EORri:
2986  case ARM::t2EORrr:
2987  case ARM::t2EORri:
2988  case ARM::t2LSRri:
2989  case ARM::t2LSRrr:
2990  case ARM::t2LSLri:
2991  case ARM::t2LSLrr:
2992  return true;
2993  }
2994 }
2995 
2996 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2997 /// comparison into one that sets the zero bit in the flags register;
2998 /// Remove a redundant Compare instruction if an earlier instruction can set the
2999 /// flags in the same way as Compare.
3000 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3001 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3002 /// condition code of instructions which use the flags.
3004  MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3005  int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3006  // Get the unique definition of SrcReg.
3007  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3008  if (!MI) return false;
3009 
3010  // Masked compares sometimes use the same register as the corresponding 'and'.
3011  if (CmpMask != ~0) {
3012  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3013  MI = nullptr;
3015  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3016  UI != UE; ++UI) {
3017  if (UI->getParent() != CmpInstr.getParent())
3018  continue;
3019  MachineInstr *PotentialAND = &*UI;
3020  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3021  isPredicated(*PotentialAND))
3022  continue;
3023  MI = PotentialAND;
3024  break;
3025  }
3026  if (!MI) return false;
3027  }
3028  }
3029 
3030  // Get ready to iterate backward from CmpInstr.
3031  MachineBasicBlock::iterator I = CmpInstr, E = MI,
3032  B = CmpInstr.getParent()->begin();
3033 
3034  // Early exit if CmpInstr is at the beginning of the BB.
3035  if (I == B) return false;
3036 
3037  // There are two possible candidates which can be changed to set CPSR:
3038  // One is MI, the other is a SUB or ADD instruction.
3039  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3040  // ADDr[ri](r1, r2, X).
3041  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3042  MachineInstr *SubAdd = nullptr;
3043  if (SrcReg2 != 0)
3044  // MI is not a candidate for CMPrr.
3045  MI = nullptr;
3046  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3047  // Conservatively refuse to convert an instruction which isn't in the same
3048  // BB as the comparison.
3049  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3050  // Thus we cannot return here.
3051  if (CmpInstr.getOpcode() == ARM::CMPri ||
3052  CmpInstr.getOpcode() == ARM::t2CMPri ||
3053  CmpInstr.getOpcode() == ARM::tCMPi8)
3054  MI = nullptr;
3055  else
3056  return false;
3057  }
3058 
3059  bool IsThumb1 = false;
3060  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3061  return false;
3062 
3063  // We also want to do this peephole for cases like this: if (a*b == 0),
3064  // and optimise away the CMP instruction from the generated code sequence:
3065  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3066  // resulting from the select instruction, but these MOVS instructions for
3067  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3068  // However, if we only have MOVS instructions in between the CMP and the
3069  // other instruction (the MULS in this example), then the CPSR is dead so we
3070  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3071  // reordering and then continue the analysis hoping we can eliminate the
3072  // CMP. This peephole works on the vregs, so is still in SSA form. As a
3073  // consequence, the movs won't redefine/kill the MUL operands which would
3074  // make this reordering illegal.
3076  if (MI && IsThumb1) {
3077  --I;
3078  if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3079  bool CanReorder = true;
3080  for (; I != E; --I) {
3081  if (I->getOpcode() != ARM::tMOVi8) {
3082  CanReorder = false;
3083  break;
3084  }
3085  }
3086  if (CanReorder) {
3087  MI = MI->removeFromParent();
3088  E = CmpInstr;
3089  CmpInstr.getParent()->insert(E, MI);
3090  }
3091  }
3092  I = CmpInstr;
3093  E = MI;
3094  }
3095 
3096  // Check that CPSR isn't set between the comparison instruction and the one we
3097  // want to change. At the same time, search for SubAdd.
3098  bool SubAddIsThumb1 = false;
3099  do {
3100  const MachineInstr &Instr = *--I;
3101 
3102  // Check whether CmpInstr can be made redundant by the current instruction.
3103  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3104  SubAddIsThumb1)) {
3105  SubAdd = &*I;
3106  break;
3107  }
3108 
3109  // Allow E (which was initially MI) to be SubAdd but do not search before E.
3110  if (I == E)
3111  break;
3112 
3113  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3114  Instr.readsRegister(ARM::CPSR, TRI))
3115  // This instruction modifies or uses CPSR after the one we want to
3116  // change. We can't do this transformation.
3117  return false;
3118 
3119  if (I == B) {
3120  // In some cases, we scan the use-list of an instruction for an AND;
3121  // that AND is in the same BB, but may not be scheduled before the
3122  // corresponding TST. In that case, bail out.
3123  //
3124  // FIXME: We could try to reschedule the AND.
3125  return false;
3126  }
3127  } while (true);
3128 
3129  // Return false if no candidates exist.
3130  if (!MI && !SubAdd)
3131  return false;
3132 
3133  // If we found a SubAdd, use it as it will be closer to the CMP
3134  if (SubAdd) {
3135  MI = SubAdd;
3136  IsThumb1 = SubAddIsThumb1;
3137  }
3138 
3139  // We can't use a predicated instruction - it doesn't always write the flags.
3140  if (isPredicated(*MI))
3141  return false;
3142 
3143  // Scan forward for the use of CPSR
3144  // When checking against MI: if it's a conditional code that requires
3145  // checking of the V bit or C bit, then this is not safe to do.
3146  // It is safe to remove CmpInstr if CPSR is redefined or killed.
3147  // If we are done with the basic block, we need to check whether CPSR is
3148  // live-out.
3150  OperandsToUpdate;
3151  bool isSafe = false;
3152  I = CmpInstr;
3153  E = CmpInstr.getParent()->end();
3154  while (!isSafe && ++I != E) {
3155  const MachineInstr &Instr = *I;
3156  for (unsigned IO = 0, EO = Instr.getNumOperands();
3157  !isSafe && IO != EO; ++IO) {
3158  const MachineOperand &MO = Instr.getOperand(IO);
3159  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3160  isSafe = true;
3161  break;
3162  }
3163  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3164  continue;
3165  if (MO.isDef()) {
3166  isSafe = true;
3167  break;
3168  }
3169  // Condition code is after the operand before CPSR except for VSELs.
3170  ARMCC::CondCodes CC;
3171  bool IsInstrVSel = true;
3172  switch (Instr.getOpcode()) {
3173  default:
3174  IsInstrVSel = false;
3175  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3176  break;
3177  case ARM::VSELEQD:
3178  case ARM::VSELEQS:
3179  case ARM::VSELEQH:
3180  CC = ARMCC::EQ;
3181  break;
3182  case ARM::VSELGTD:
3183  case ARM::VSELGTS:
3184  case ARM::VSELGTH:
3185  CC = ARMCC::GT;
3186  break;
3187  case ARM::VSELGED:
3188  case ARM::VSELGES:
3189  case ARM::VSELGEH:
3190  CC = ARMCC::GE;
3191  break;
3192  case ARM::VSELVSD:
3193  case ARM::VSELVSS:
3194  case ARM::VSELVSH:
3195  CC = ARMCC::VS;
3196  break;
3197  }
3198 
3199  if (SubAdd) {
3200  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3201  // on CMP needs to be updated to be based on SUB.
3202  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3203  // needs to be modified.
3204  // Push the condition code operands to OperandsToUpdate.
3205  // If it is safe to remove CmpInstr, the condition code of these
3206  // operands will be modified.
3207  unsigned Opc = SubAdd->getOpcode();
3208  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3209  Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3210  Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3211  Opc == ARM::tSUBi8;
3212  unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3213  if (!IsSub ||
3214  (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3215  SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3216  // VSel doesn't support condition code update.
3217  if (IsInstrVSel)
3218  return false;
3219  // Ensure we can swap the condition.
3220  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3221  if (NewCC == ARMCC::AL)
3222  return false;
3223  OperandsToUpdate.push_back(
3224  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3225  }
3226  } else {
3227  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3228  switch (CC) {
3229  case ARMCC::EQ: // Z
3230  case ARMCC::NE: // Z
3231  case ARMCC::MI: // N
3232  case ARMCC::PL: // N
3233  case ARMCC::AL: // none
3234  // CPSR can be used multiple times, we should continue.
3235  break;
3236  case ARMCC::HS: // C
3237  case ARMCC::LO: // C
3238  case ARMCC::VS: // V
3239  case ARMCC::VC: // V
3240  case ARMCC::HI: // C Z
3241  case ARMCC::LS: // C Z
3242  case ARMCC::GE: // N V
3243  case ARMCC::LT: // N V
3244  case ARMCC::GT: // Z N V
3245  case ARMCC::LE: // Z N V
3246  // The instruction uses the V bit or C bit which is not safe.
3247  return false;
3248  }
3249  }
3250  }
3251  }
3252 
3253  // If CPSR is not killed nor re-defined, we should check whether it is
3254  // live-out. If it is live-out, do not optimize.
3255  if (!isSafe) {
3256  MachineBasicBlock *MBB = CmpInstr.getParent();
3257  for (MachineBasicBlock *Succ : MBB->successors())
3258  if (Succ->isLiveIn(ARM::CPSR))
3259  return false;
3260  }
3261 
3262  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3263  // set CPSR so this is represented as an explicit output)
3264  if (!IsThumb1) {
3265  MI->getOperand(5).setReg(ARM::CPSR);
3266  MI->getOperand(5).setIsDef(true);
3267  }
3268  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3269  CmpInstr.eraseFromParent();
3270 
3271  // Modify the condition code of operands in OperandsToUpdate.
3272  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3273  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3274  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3275  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3276 
3277  MI->clearRegisterDeads(ARM::CPSR);
3278 
3279  return true;
3280 }
3281 
3283  // Do not sink MI if it might be used to optimize a redundant compare.
3284  // We heuristically only look at the instruction immediately following MI to
3285  // avoid potentially searching the entire basic block.
3286  if (isPredicated(MI))
3287  return true;
3289  ++Next;
3290  Register SrcReg, SrcReg2;
3291  int64_t CmpMask, CmpValue;
3292  bool IsThumb1;
3293  if (Next != MI.getParent()->end() &&
3294  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3295  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3296  return false;
3297  return true;
3298 }
3299 
3301  Register Reg,
3302  MachineRegisterInfo *MRI) const {
3303  // Fold large immediates into add, sub, or, xor.
3304  unsigned DefOpc = DefMI.getOpcode();
3305  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
3306  return false;
3307  if (!DefMI.getOperand(1).isImm())
3308  // Could be t2MOVi32imm @xx
3309  return false;
3310 
3311  if (!MRI->hasOneNonDBGUse(Reg))
3312  return false;
3313 
3314  const MCInstrDesc &DefMCID = DefMI.getDesc();
3315  if (DefMCID.hasOptionalDef()) {
3316  unsigned NumOps = DefMCID.getNumOperands();
3317  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3318  if (MO.getReg() == ARM::CPSR && !MO.isDead())
3319  // If DefMI defines CPSR and it is not dead, it's obviously not safe
3320  // to delete DefMI.
3321  return false;
3322  }
3323 
3324  const MCInstrDesc &UseMCID = UseMI.getDesc();
3325  if (UseMCID.hasOptionalDef()) {
3326  unsigned NumOps = UseMCID.getNumOperands();
3327  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3328  // If the instruction sets the flag, do not attempt this optimization
3329  // since it may change the semantics of the code.
3330  return false;
3331  }
3332 
3333  unsigned UseOpc = UseMI.getOpcode();
3334  unsigned NewUseOpc = 0;
3335  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3336  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3337  bool Commute = false;
3338  switch (UseOpc) {
3339  default: return false;
3340  case ARM::SUBrr:
3341  case ARM::ADDrr:
3342  case ARM::ORRrr:
3343  case ARM::EORrr:
3344  case ARM::t2SUBrr:
3345  case ARM::t2ADDrr:
3346  case ARM::t2ORRrr:
3347  case ARM::t2EORrr: {
3348  Commute = UseMI.getOperand(2).getReg() != Reg;
3349  switch (UseOpc) {
3350  default: break;
3351  case ARM::ADDrr:
3352  case ARM::SUBrr:
3353  if (UseOpc == ARM::SUBrr && Commute)
3354  return false;
3355 
3356  // ADD/SUB are special because they're essentially the same operation, so
3357  // we can handle a larger range of immediates.
3358  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3359  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3360  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3361  ImmVal = -ImmVal;
3362  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3363  } else
3364  return false;
3365  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3366  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3367  break;
3368  case ARM::ORRrr:
3369  case ARM::EORrr:
3370  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3371  return false;
3372  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3373  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3374  switch (UseOpc) {
3375  default: break;
3376  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3377  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3378  }
3379  break;
3380  case ARM::t2ADDrr:
3381  case ARM::t2SUBrr: {
3382  if (UseOpc == ARM::t2SUBrr && Commute)
3383  return false;
3384 
3385  // ADD/SUB are special because they're essentially the same operation, so
3386  // we can handle a larger range of immediates.
3387  const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3388  const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3389  const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3390  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3391  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3392  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3393  ImmVal = -ImmVal;
3394  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3395  } else
3396  return false;
3397  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3398  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3399  break;
3400  }
3401  case ARM::t2ORRrr:
3402  case ARM::t2EORrr:
3403  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3404  return false;
3405  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3406  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3407  switch (UseOpc) {
3408  default: break;
3409  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3410  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3411  }
3412  break;
3413  }
3414  }
3415  }
3416 
3417  unsigned OpIdx = Commute ? 2 : 1;
3418  Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3419  bool isKill = UseMI.getOperand(OpIdx).isKill();
3420  const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3421  Register NewReg = MRI->createVirtualRegister(TRC);
3422  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3423  NewReg)
3424  .addReg(Reg1, getKillRegState(isKill))
3425  .addImm(SOImmValV1)
3426  .add(predOps(ARMCC::AL))
3427  .add(condCodeOp());
3428  UseMI.setDesc(get(NewUseOpc));
3429  UseMI.getOperand(1).setReg(NewReg);
3430  UseMI.getOperand(1).setIsKill();
3431  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3432  DefMI.eraseFromParent();
3433  // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3434  // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3435  // Then the below code will not be needed, as the input/output register
3436  // classes will be rgpr or gprSP.
3437  // For now, we fix the UseMI operand explicitly here:
3438  switch(NewUseOpc){
3439  case ARM::t2ADDspImm:
3440  case ARM::t2SUBspImm:
3441  case ARM::t2ADDri:
3442  case ARM::t2SUBri:
3443  MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3444  }
3445  return true;
3446 }
3447 
3448 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3449  const MachineInstr &MI) {
3450  switch (MI.getOpcode()) {
3451  default: {
3452  const MCInstrDesc &Desc = MI.getDesc();
3453  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3454  assert(UOps >= 0 && "bad # UOps");
3455  return UOps;
3456  }
3457 
3458  case ARM::LDRrs:
3459  case ARM::LDRBrs:
3460  case ARM::STRrs:
3461  case ARM::STRBrs: {
3462  unsigned ShOpVal = MI.getOperand(3).getImm();
3463  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3464  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3465  if (!isSub &&
3466  (ShImm == 0 ||
3467  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3468  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3469  return 1;
3470  return 2;
3471  }
3472 
3473  case ARM::LDRH:
3474  case ARM::STRH: {
3475  if (!MI.getOperand(2).getReg())
3476  return 1;
3477 
3478  unsigned ShOpVal = MI.getOperand(3).getImm();
3479  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3480  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3481  if (!isSub &&
3482  (ShImm == 0 ||
3483  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3484  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3485  return 1;
3486  return 2;
3487  }
3488 
3489  case ARM::LDRSB:
3490  case ARM::LDRSH:
3491  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3492 
3493  case ARM::LDRSB_POST:
3494  case ARM::LDRSH_POST: {
3495  Register Rt = MI.getOperand(0).getReg();
3496  Register Rm = MI.getOperand(3).getReg();
3497  return (Rt == Rm) ? 4 : 3;
3498  }
3499 
3500  case ARM::LDR_PRE_REG:
3501  case ARM::LDRB_PRE_REG: {
3502  Register Rt = MI.getOperand(0).getReg();
3503  Register Rm = MI.getOperand(3).getReg();
3504  if (Rt == Rm)
3505  return 3;
3506  unsigned ShOpVal = MI.getOperand(4).getImm();
3507  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3508  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3509  if (!isSub &&
3510  (ShImm == 0 ||
3511  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3512  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3513  return 2;
3514  return 3;
3515  }
3516 
3517  case ARM::STR_PRE_REG:
3518  case ARM::STRB_PRE_REG: {
3519  unsigned ShOpVal = MI.getOperand(4).getImm();
3520  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3521  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3522  if (!isSub &&
3523  (ShImm == 0 ||
3524  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3525  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3526  return 2;
3527  return 3;
3528  }
3529 
3530  case ARM::LDRH_PRE:
3531  case ARM::STRH_PRE: {
3532  Register Rt = MI.getOperand(0).getReg();
3533  Register Rm = MI.getOperand(3).getReg();
3534  if (!Rm)
3535  return 2;
3536  if (Rt == Rm)
3537  return 3;
3538  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3539  }
3540 
3541  case ARM::LDR_POST_REG:
3542  case ARM::LDRB_POST_REG:
3543  case ARM::LDRH_POST: {
3544  Register Rt = MI.getOperand(0).getReg();
3545  Register Rm = MI.getOperand(3).getReg();
3546  return (Rt == Rm) ? 3 : 2;
3547  }
3548 
3549  case ARM::LDR_PRE_IMM:
3550  case ARM::LDRB_PRE_IMM:
3551  case ARM::LDR_POST_IMM:
3552  case ARM::LDRB_POST_IMM:
3553  case ARM::STRB_POST_IMM:
3554  case ARM::STRB_POST_REG:
3555  case ARM::STRB_PRE_IMM:
3556  case ARM::STRH_POST:
3557  case ARM::STR_POST_IMM:
3558  case ARM::STR_POST_REG:
3559  case ARM::STR_PRE_IMM:
3560  return 2;
3561 
3562  case ARM::LDRSB_PRE:
3563  case ARM::LDRSH_PRE: {
3564  Register Rm = MI.getOperand(3).getReg();
3565  if (Rm == 0)
3566  return 3;
3567  Register Rt = MI.getOperand(0).getReg();
3568  if (Rt == Rm)
3569  return 4;
3570  unsigned ShOpVal = MI.getOperand(4).getImm();
3571  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3572  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3573  if (!isSub &&
3574  (ShImm == 0 ||
3575  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3576  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3577  return 3;
3578  return 4;
3579  }
3580 
3581  case ARM::LDRD: {
3582  Register Rt = MI.getOperand(0).getReg();
3583  Register Rn = MI.getOperand(2).getReg();
3584  Register Rm = MI.getOperand(3).getReg();
3585  if (Rm)
3586  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3587  : 3;
3588  return (Rt == Rn) ? 3 : 2;
3589  }
3590 
3591  case ARM::STRD: {
3592  Register Rm = MI.getOperand(3).getReg();
3593  if (Rm)
3594  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3595  : 3;
3596  return 2;
3597  }
3598 
3599  case ARM::LDRD_POST:
3600  case ARM::t2LDRD_POST:
3601  return 3;
3602 
3603  case ARM::STRD_POST:
3604  case ARM::t2STRD_POST:
3605  return 4;
3606 
3607  case ARM::LDRD_PRE: {
3608  Register Rt = MI.getOperand(0).getReg();
3609  Register Rn = MI.getOperand(3).getReg();
3610  Register Rm = MI.getOperand(4).getReg();
3611  if (Rm)
3612  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3613  : 4;
3614  return (Rt == Rn) ? 4 : 3;
3615  }
3616 
3617  case ARM::t2LDRD_PRE: {
3618  Register Rt = MI.getOperand(0).getReg();
3619  Register Rn = MI.getOperand(3).getReg();
3620  return (Rt == Rn) ? 4 : 3;
3621  }
3622 
3623  case ARM::STRD_PRE: {
3624  Register Rm = MI.getOperand(4).getReg();
3625  if (Rm)
3626  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3627  : 4;
3628  return 3;
3629  }
3630 
3631  case ARM::t2STRD_PRE:
3632  return 3;
3633 
3634  case ARM::t2LDR_POST:
3635  case ARM::t2LDRB_POST:
3636  case ARM::t2LDRB_PRE:
3637  case ARM::t2LDRSBi12:
3638  case ARM::t2LDRSBi8:
3639  case ARM::t2LDRSBpci:
3640  case ARM::t2LDRSBs:
3641  case ARM::t2LDRH_POST:
3642  case ARM::t2LDRH_PRE:
3643  case ARM::t2LDRSBT:
3644  case ARM::t2LDRSB_POST:
3645  case ARM::t2LDRSB_PRE:
3646  case ARM::t2LDRSH_POST:
3647  case ARM::t2LDRSH_PRE:
3648  case ARM::t2LDRSHi12:
3649  case ARM::t2LDRSHi8:
3650  case ARM::t2LDRSHpci:
3651  case ARM::t2LDRSHs:
3652  return 2;
3653 
3654  case ARM::t2LDRDi8: {
3655  Register Rt = MI.getOperand(0).getReg();
3656  Register Rn = MI.getOperand(2).getReg();
3657  return (Rt == Rn) ? 3 : 2;
3658  }
3659 
3660  case ARM::t2STRB_POST:
3661  case ARM::t2STRB_PRE:
3662  case ARM::t2STRBs:
3663  case ARM::t2STRDi8:
3664  case ARM::t2STRH_POST:
3665  case ARM::t2STRH_PRE:
3666  case ARM::t2STRHs:
3667  case ARM::t2STR_POST:
3668  case ARM::t2STR_PRE:
3669  case ARM::t2STRs:
3670  return 2;
3671  }
3672 }
3673 
3674 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3675 // can't be easily determined return 0 (missing MachineMemOperand).
3676 //
3677 // FIXME: The current MachineInstr design does not support relying on machine
3678 // mem operands to determine the width of a memory access. Instead, we expect
3679 // the target to provide this information based on the instruction opcode and
3680 // operands. However, using MachineMemOperand is the best solution now for
3681 // two reasons:
3682 //
3683 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3684 // operands. This is much more dangerous than using the MachineMemOperand
3685 // sizes because CodeGen passes can insert/remove optional machine operands. In
3686 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3687 // postRA passes as well.
3688 //
3689 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3690 // machine model that calls this should handle the unknown (zero size) case.
3691 //
3692 // Long term, we should require a target hook that verifies MachineMemOperand
3693 // sizes during MC lowering. That target hook should be local to MC lowering
3694 // because we can't ensure that it is aware of other MI forms. Doing this will
3695 // ensure that MachineMemOperands are correctly propagated through all passes.
3697  unsigned Size = 0;
3698  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3699  E = MI.memoperands_end();
3700  I != E; ++I) {
3701  Size += (*I)->getSize();
3702  }
3703  // FIXME: The scheduler currently can't handle values larger than 16. But
3704  // the values can actually go up to 32 for floating-point load/store
3705  // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3706  // operations isn't right; we could end up with "extra" memory operands for
3707  // various reasons, like tail merge merging two memory operations.
3708  return std::min(Size / 4, 16U);
3709 }
3710 
3711 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3712  unsigned NumRegs) {
3713  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3714  switch (Opc) {
3715  default:
3716  break;
3717  case ARM::VLDMDIA_UPD:
3718  case ARM::VLDMDDB_UPD:
3719  case ARM::VLDMSIA_UPD:
3720  case ARM::VLDMSDB_UPD:
3721  case ARM::VSTMDIA_UPD:
3722  case ARM::VSTMDDB_UPD:
3723  case ARM::VSTMSIA_UPD:
3724  case ARM::VSTMSDB_UPD:
3725  case ARM::LDMIA_UPD:
3726  case ARM::LDMDA_UPD:
3727  case ARM::LDMDB_UPD:
3728  case ARM::LDMIB_UPD:
3729  case ARM::STMIA_UPD:
3730  case ARM::STMDA_UPD:
3731  case ARM::STMDB_UPD:
3732  case ARM::STMIB_UPD:
3733  case ARM::tLDMIA_UPD:
3734  case ARM::tSTMIA_UPD:
3735  case ARM::t2LDMIA_UPD:
3736  case ARM::t2LDMDB_UPD:
3737  case ARM::t2STMIA_UPD:
3738  case ARM::t2STMDB_UPD:
3739  ++UOps; // One for base register writeback.
3740  break;
3741  case ARM::LDMIA_RET:
3742  case ARM::tPOP_RET:
3743  case ARM::t2LDMIA_RET:
3744  UOps += 2; // One for base reg wb, one for write to pc.
3745  break;
3746  }
3747  return UOps;
3748 }
3749 
3751  const MachineInstr &MI) const {
3752  if (!ItinData || ItinData->isEmpty())
3753  return 1;
3754 
3755  const MCInstrDesc &Desc = MI.getDesc();
3756  unsigned Class = Desc.getSchedClass();
3757  int ItinUOps = ItinData->getNumMicroOps(Class);
3758  if (ItinUOps >= 0) {
3759  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3760  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3761 
3762  return ItinUOps;
3763  }
3764 
3765  unsigned Opc = MI.getOpcode();
3766  switch (Opc) {
3767  default:
3768  llvm_unreachable("Unexpected multi-uops instruction!");
3769  case ARM::VLDMQIA:
3770  case ARM::VSTMQIA:
3771  return 2;
3772 
3773  // The number of uOps for load / store multiple are determined by the number
3774  // registers.
3775  //
3776  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3777  // same cycle. The scheduling for the first load / store must be done
3778  // separately by assuming the address is not 64-bit aligned.
3779  //
3780  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3781  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3782  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3783  case ARM::VLDMDIA:
3784  case ARM::VLDMDIA_UPD:
3785  case ARM::VLDMDDB_UPD:
3786  case ARM::VLDMSIA:
3787  case ARM::VLDMSIA_UPD:
3788  case ARM::VLDMSDB_UPD:
3789  case ARM::VSTMDIA:
3790  case ARM::VSTMDIA_UPD:
3791  case ARM::VSTMDDB_UPD:
3792  case ARM::VSTMSIA:
3793  case ARM::VSTMSIA_UPD:
3794  case ARM::VSTMSDB_UPD: {
3795  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3796  return (NumRegs / 2) + (NumRegs % 2) + 1;
3797  }
3798 
3799  case ARM::LDMIA_RET:
3800  case ARM::LDMIA:
3801  case ARM::LDMDA:
3802  case ARM::LDMDB:
3803  case ARM::LDMIB:
3804  case ARM::LDMIA_UPD:
3805  case ARM::LDMDA_UPD:
3806  case ARM::LDMDB_UPD:
3807  case ARM::LDMIB_UPD:
3808  case ARM::STMIA:
3809  case ARM::STMDA:
3810  case ARM::STMDB:
3811  case ARM::STMIB:
3812  case ARM::STMIA_UPD:
3813  case ARM::STMDA_UPD:
3814  case ARM::STMDB_UPD:
3815  case ARM::STMIB_UPD:
3816  case ARM::tLDMIA:
3817  case ARM::tLDMIA_UPD:
3818  case ARM::tSTMIA_UPD:
3819  case ARM::tPOP_RET:
3820  case ARM::tPOP:
3821  case ARM::tPUSH:
3822  case ARM::t2LDMIA_RET:
3823  case ARM::t2LDMIA:
3824  case ARM::t2LDMDB:
3825  case ARM::t2LDMIA_UPD:
3826  case ARM::t2LDMDB_UPD:
3827  case ARM::t2STMIA:
3828  case ARM::t2STMDB:
3829  case ARM::t2STMIA_UPD:
3830  case ARM::t2STMDB_UPD: {
3831  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3832  switch (Subtarget.getLdStMultipleTiming()) {
3834  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3836  // Assume the worst.
3837  return NumRegs;
3839  if (NumRegs < 4)
3840  return 2;
3841  // 4 registers would be issued: 2, 2.
3842  // 5 registers would be issued: 2, 2, 1.
3843  unsigned UOps = (NumRegs / 2);
3844  if (NumRegs % 2)
3845  ++UOps;
3846  return UOps;
3847  }
3849  unsigned UOps = (NumRegs / 2);
3850  // If there are odd number of registers or if it's not 64-bit aligned,
3851  // then it takes an extra AGU (Address Generation Unit) cycle.
3852  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3853  (*MI.memoperands_begin())->getAlign() < Align(8))
3854  ++UOps;
3855  return UOps;
3856  }
3857  }
3858  }
3859  }
3860  llvm_unreachable("Didn't find the number of microops");
3861 }
3862 
3863 int
3864 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3865  const MCInstrDesc &DefMCID,
3866  unsigned DefClass,
3867  unsigned DefIdx, unsigned DefAlign) const {
3868  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3869  if (RegNo <= 0)
3870  // Def is the address writeback.
3871  return ItinData->getOperandCycle(DefClass, DefIdx);
3872 
3873  int DefCycle;
3874  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3875  // (regno / 2) + (regno % 2) + 1
3876  DefCycle = RegNo / 2 + 1;
3877  if (RegNo % 2)
3878  ++DefCycle;
3879  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3880  DefCycle = RegNo;
3881  bool isSLoad = false;
3882 
3883  switch (DefMCID.getOpcode()) {
3884  default: break;
3885  case ARM::VLDMSIA:
3886  case ARM::VLDMSIA_UPD:
3887  case ARM::VLDMSDB_UPD:
3888  isSLoad = true;
3889  break;
3890  }
3891 
3892  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3893  // then it takes an extra cycle.
3894  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3895  ++DefCycle;
3896  } else {
3897  // Assume the worst.
3898  DefCycle = RegNo + 2;
3899  }
3900 
3901  return DefCycle;
3902 }
3903 
3904 int
3905 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3906  const MCInstrDesc &DefMCID,
3907  unsigned DefClass,
3908  unsigned DefIdx, unsigned DefAlign) const {
3909  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3910  if (RegNo <= 0)
3911  // Def is the address writeback.
3912  return ItinData->getOperandCycle(DefClass, DefIdx);
3913 
3914  int DefCycle;
3915  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3916  // 4 registers would be issued: 1, 2, 1.
3917  // 5 registers would be issued: 1, 2, 2.
3918  DefCycle = RegNo / 2;
3919  if (DefCycle < 1)
3920  DefCycle = 1;
3921  // Result latency is issue cycle + 2: E2.
3922  DefCycle += 2;
3923  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3924  DefCycle = (RegNo / 2);
3925  // If there are odd number of registers or if it's not 64-bit aligned,
3926  // then it takes an extra AGU (Address Generation Unit) cycle.
3927  if ((RegNo % 2) || DefAlign < 8)
3928  ++DefCycle;
3929  // Result latency is AGU cycles + 2.
3930  DefCycle += 2;
3931  } else {
3932  // Assume the worst.
3933  DefCycle = RegNo + 2;
3934  }
3935 
3936  return DefCycle;
3937 }
3938 
3939 int
3940 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3941  const MCInstrDesc &UseMCID,
3942  unsigned UseClass,
3943  unsigned UseIdx, unsigned UseAlign) const {
3944  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3945  if (RegNo <= 0)
3946  return ItinData->getOperandCycle(UseClass, UseIdx);
3947 
3948  int UseCycle;
3949  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3950  // (regno / 2) + (regno % 2) + 1
3951  UseCycle = RegNo / 2 + 1;
3952  if (RegNo % 2)
3953  ++UseCycle;
3954  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3955  UseCycle = RegNo;
3956  bool isSStore = false;
3957 
3958  switch (UseMCID.getOpcode()) {
3959  default: break;
3960  case ARM::VSTMSIA:
3961  case ARM::VSTMSIA_UPD:
3962  case ARM::VSTMSDB_UPD:
3963  isSStore = true;
3964  break;
3965  }
3966 
3967  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3968  // then it takes an extra cycle.
3969  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3970  ++UseCycle;
3971  } else {
3972  // Assume the worst.
3973  UseCycle = RegNo + 2;
3974  }
3975 
3976  return UseCycle;
3977 }
3978 
3979 int
3980 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3981  const MCInstrDesc &UseMCID,
3982  unsigned UseClass,
3983  unsigned UseIdx, unsigned UseAlign) const {
3984  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3985  if (RegNo <= 0)
3986  return ItinData->getOperandCycle(UseClass, UseIdx);
3987 
3988  int UseCycle;
3989  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3990  UseCycle = RegNo / 2;
3991  if (UseCycle < 2)
3992  UseCycle = 2;
3993  // Read in E3.
3994  UseCycle += 2;
3995  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3996  UseCycle = (RegNo / 2);
3997  // If there are odd number of registers or if it's not 64-bit aligned,
3998  // then it takes an extra AGU (Address Generation Unit) cycle.
3999  if ((RegNo % 2) || UseAlign < 8)
4000  ++UseCycle;
4001  } else {
4002  // Assume the worst.
4003  UseCycle = 1;
4004  }
4005  return UseCycle;
4006 }
4007 
4008 int
4010  const MCInstrDesc &DefMCID,
4011  unsigned DefIdx, unsigned DefAlign,
4012  const MCInstrDesc &UseMCID,
4013  unsigned UseIdx, unsigned UseAlign) const {
4014  unsigned DefClass = DefMCID.getSchedClass();
4015  unsigned UseClass = UseMCID.getSchedClass();
4016 
4017  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4018  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4019 
4020  // This may be a def / use of a variable_ops instruction, the operand
4021  // latency might be determinable dynamically. Let the target try to
4022  // figure it out.
4023  int DefCycle = -1;
4024  bool LdmBypass = false;
4025  switch (DefMCID.getOpcode()) {
4026  default:
4027  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4028  break;
4029 
4030  case ARM::VLDMDIA:
4031  case ARM::VLDMDIA_UPD:
4032  case ARM::VLDMDDB_UPD:
4033  case ARM::VLDMSIA:
4034  case ARM::VLDMSIA_UPD:
4035  case ARM::VLDMSDB_UPD:
4036  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4037  break;
4038 
4039  case ARM::LDMIA_RET:
4040  case ARM::LDMIA:
4041  case ARM::LDMDA:
4042  case ARM::LDMDB:
4043  case ARM::LDMIB:
4044  case ARM::LDMIA_UPD:
4045  case ARM::LDMDA_UPD:
4046  case ARM::LDMDB_UPD:
4047  case ARM::LDMIB_UPD:
4048  case ARM::tLDMIA:
4049  case ARM::tLDMIA_UPD:
4050  case ARM::tPUSH:
4051  case ARM::t2LDMIA_RET:
4052  case ARM::t2LDMIA:
4053  case ARM::t2LDMDB:
4054  case ARM::t2LDMIA_UPD:
4055  case ARM::t2LDMDB_UPD:
4056  LdmBypass = true;
4057  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4058  break;
4059  }
4060 
4061  if (DefCycle == -1)
4062  // We can't seem to determine the result latency of the def, assume it's 2.
4063  DefCycle = 2;
4064 
4065  int UseCycle = -1;
4066  switch (UseMCID.getOpcode()) {
4067  default:
4068  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4069  break;
4070 
4071  case ARM::VSTMDIA:
4072  case ARM::VSTMDIA_UPD:
4073  case ARM::VSTMDDB_UPD:
4074  case ARM::VSTMSIA:
4075  case ARM::VSTMSIA_UPD:
4076  case ARM::VSTMSDB_UPD:
4077  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4078  break;
4079 
4080  case ARM::STMIA:
4081  case ARM::STMDA:
4082  case ARM::STMDB:
4083  case ARM::STMIB:
4084  case ARM::STMIA_UPD:
4085  case ARM::STMDA_UPD:
4086  case ARM::STMDB_UPD:
4087  case ARM::STMIB_UPD:
4088  case ARM::tSTMIA_UPD:
4089  case ARM::tPOP_RET:
4090  case ARM::tPOP:
4091  case ARM::t2STMIA:
4092  case ARM::t2STMDB:
4093  case ARM::t2STMIA_UPD:
4094  case ARM::t2STMDB_UPD:
4095  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4096  break;
4097  }
4098 
4099  if (UseCycle == -1)
4100  // Assume it's read in the first stage.
4101  UseCycle = 1;
4102 
4103  UseCycle = DefCycle - UseCycle + 1;
4104  if (UseCycle > 0) {
4105  if (LdmBypass) {
4106  // It's a variable_ops instruction so we can't use DefIdx here. Just use
4107  // first def operand.
4108  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4109  UseClass, UseIdx))
4110  --UseCycle;
4111  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4112  UseClass, UseIdx)) {
4113  --UseCycle;
4114  }
4115  }
4116 
4117  return UseCycle;
4118 }
4119 
4121  const MachineInstr *MI, unsigned Reg,
4122  unsigned &DefIdx, unsigned &Dist) {
4123  Dist = 0;
4124 
4126  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4127  assert(II->isInsideBundle() && "Empty bundle?");
4128 
4129  int Idx = -1;
4130  while (II->isInsideBundle()) {
4131  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
4132  if (Idx != -1)
4133  break;
4134  --II;
4135  ++Dist;
4136  }
4137 
4138  assert(Idx != -1 && "Cannot find bundled definition!");
4139  DefIdx = Idx;
4140  return &*II;
4141 }
4142 
4144  const MachineInstr &MI, unsigned Reg,
4145  unsigned &UseIdx, unsigned &Dist) {
4146  Dist = 0;
4147 
4148  MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4149  assert(II->isInsideBundle() && "Empty bundle?");
4150  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4151 
4152  // FIXME: This doesn't properly handle multiple uses.
4153  int Idx = -1;
4154  while (II != E && II->isInsideBundle()) {
4155  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
4156  if (Idx != -1)
4157  break;
4158  if (II->getOpcode() != ARM::t2IT)
4159  ++Dist;
4160  ++II;
4161  }
4162 
4163  if (Idx == -1) {
4164  Dist = 0;
4165  return nullptr;
4166  }
4167 
4168  UseIdx = Idx;
4169  return &*II;
4170 }
4171 
4172 /// Return the number of cycles to add to (or subtract from) the static
4173 /// itinerary based on the def opcode and alignment. The caller will ensure that
4174 /// adjusted latency is at least one cycle.
4175 static int adjustDefLatency(const ARMSubtarget &Subtarget,
4176  const MachineInstr &DefMI,
4177  const MCInstrDesc &DefMCID, unsigned DefAlign) {
4178  int Adjust = 0;
4179  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4180  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4181  // variants are one cycle cheaper.
4182  switch (DefMCID.getOpcode()) {
4183  default: break;
4184  case ARM::LDRrs:
4185  case ARM::LDRBrs: {
4186  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4187  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4188  if (ShImm == 0 ||
4189  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4190  --Adjust;
4191  break;
4192  }
4193  case ARM::t2LDRs:
4194  case ARM::t2LDRBs:
4195  case ARM::t2LDRHs:
4196  case ARM::t2LDRSHs: {
4197  // Thumb2 mode: lsl only.
4198  unsigned ShAmt = DefMI.getOperand(3).getImm();
4199  if (ShAmt == 0 || ShAmt == 2)
4200  --Adjust;
4201  break;
4202  }
4203  }
4204  } else if (Subtarget.isSwift()) {
4205  // FIXME: Properly handle all of the latency adjustments for address
4206  // writeback.
4207  switch (DefMCID.getOpcode()) {
4208  default: break;
4209  case ARM::LDRrs:
4210  case ARM::LDRBrs: {
4211  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4212  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4213  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4214  if (!isSub &&
4215  (ShImm == 0 ||
4216  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4217  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4218  Adjust -= 2;
4219  else if (!isSub &&
4220  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4221  --Adjust;
4222  break;
4223  }
4224  case ARM::t2LDRs:
4225  case ARM::t2LDRBs:
4226  case ARM::t2LDRHs:
4227  case ARM::t2LDRSHs: {
4228  // Thumb2 mode: lsl only.
4229  unsigned ShAmt = DefMI.getOperand(3).getImm();
4230  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4231  Adjust -= 2;
4232  break;
4233  }
4234  }
4235  }
4236 
4237  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4238  switch (DefMCID.getOpcode()) {
4239  default: break;
4240  case ARM::VLD1q8:
4241  case ARM::VLD1q16:
4242  case ARM::VLD1q32:
4243  case ARM::VLD1q64:
4244  case ARM::VLD1q8wb_fixed:
4245  case ARM::VLD1q16wb_fixed:
4246  case ARM::VLD1q32wb_fixed:
4247  case ARM::VLD1q64wb_fixed:
4248  case ARM::VLD1q8wb_register:
4249  case ARM::VLD1q16wb_register:
4250  case ARM::VLD1q32wb_register:
4251  case ARM::VLD1q64wb_register:
4252  case ARM::VLD2d8:
4253  case ARM::VLD2d16:
4254  case ARM::VLD2d32:
4255  case ARM::VLD2q8:
4256  case ARM::VLD2q16:
4257  case ARM::VLD2q32:
4258  case ARM::VLD2d8wb_fixed:
4259  case ARM::VLD2d16wb_fixed:
4260  case ARM::VLD2d32wb_fixed:
4261  case ARM::VLD2q8wb_fixed:
4262  case ARM::VLD2q16wb_fixed:
4263  case ARM::VLD2q32wb_fixed:
4264  case ARM::VLD2d8wb_register:
4265  case ARM::VLD2d16wb_register:
4266  case ARM::VLD2d32wb_register:
4267  case ARM::VLD2q8wb_register:
4268  case ARM::VLD2q16wb_register:
4269  case ARM::VLD2q32wb_register:
4270  case ARM::VLD3d8:
4271  case ARM::VLD3d16:
4272  case ARM::VLD3d32:
4273  case ARM::VLD1d64T:
4274  case ARM::VLD3d8_UPD:
4275  case ARM::VLD3d16_UPD:
4276  case ARM::VLD3d32_UPD:
4277  case ARM::VLD1d64Twb_fixed:
4278  case ARM::VLD1d64Twb_register:
4279  case ARM::VLD3q8_UPD:
4280  case ARM::VLD3q16_UPD:
4281  case ARM::VLD3q32_UPD:
4282  case ARM::VLD4d8:
4283  case ARM::VLD4d16:
4284  case ARM::VLD4d32:
4285  case ARM::VLD1d64Q:
4286  case ARM::VLD4d8_UPD:
4287  case ARM::VLD4d16_UPD:
4288  case ARM::VLD4d32_UPD:
4289  case ARM::VLD1d64Qwb_fixed:
4290  case ARM::VLD1d64Qwb_register:
4291  case ARM::VLD4q8_UPD:
4292  case ARM::VLD4q16_UPD:
4293  case ARM::VLD4q32_UPD:
4294  case ARM::VLD1DUPq8:
4295  case ARM::VLD1DUPq16:
4296  case ARM::VLD1DUPq32:
4297  case ARM::VLD1DUPq8wb_fixed:
4298  case ARM::VLD1DUPq16wb_fixed:
4299  case ARM::VLD1DUPq32wb_fixed:
4300  case ARM::VLD1DUPq8wb_register:
4301  case ARM::VLD1DUPq16wb_register:
4302  case ARM::VLD1DUPq32wb_register:
4303  case ARM::VLD2DUPd8:
4304  case ARM::VLD2DUPd16:
4305  case ARM::VLD2DUPd32:
4306  case ARM::VLD2DUPd8wb_fixed:
4307  case ARM::VLD2DUPd16wb_fixed:
4308  case ARM::VLD2DUPd32wb_fixed:
4309  case ARM::VLD2DUPd8wb_register:
4310  case ARM::VLD2DUPd16wb_register:
4311  case ARM::VLD2DUPd32wb_register:
4312  case ARM::VLD4DUPd8:
4313  case ARM::VLD4DUPd16:
4314  case ARM::VLD4DUPd32:
4315  case ARM::VLD4DUPd8_UPD:
4316  case ARM::VLD4DUPd16_UPD:
4317  case ARM::VLD4DUPd32_UPD:
4318  case ARM::VLD1LNd8:
4319  case ARM::VLD1LNd16:
4320  case ARM::VLD1LNd32:
4321  case ARM::VLD1LNd8_UPD:
4322  case ARM::VLD1LNd16_UPD:
4323  case ARM::VLD1LNd32_UPD:
4324  case ARM::VLD2LNd8:
4325  case ARM::VLD2LNd16:
4326  case ARM::VLD2LNd32:
4327  case ARM::VLD2LNq16:
4328  case ARM::VLD2LNq32:
4329  case ARM::VLD2LNd8_UPD:
4330  case ARM::VLD2LNd16_UPD:
4331  case ARM::VLD2LNd32_UPD:
4332  case ARM::VLD2LNq16_UPD:
4333  case ARM::VLD2LNq32_UPD:
4334  case ARM::VLD4LNd8:
4335  case ARM::VLD4LNd16:
4336  case ARM::VLD4LNd32:
4337  case ARM::VLD4LNq16:
4338  case ARM::VLD4LNq32:
4339  case ARM::VLD4LNd8_UPD:
4340  case ARM::VLD4LNd16_UPD:
4341  case ARM::VLD4LNd32_UPD:
4342  case ARM::VLD4LNq16_UPD:
4343  case ARM::VLD4LNq32_UPD:
4344  // If the address is not 64-bit aligned, the latencies of these
4345  // instructions increases by one.
4346  ++Adjust;
4347  break;
4348  }
4349  }
4350  return Adjust;
4351 }
4352 
4354  const MachineInstr &DefMI,
4355  unsigned DefIdx,
4356  const MachineInstr &UseMI,
4357  unsigned UseIdx) const {
4358  // No operand latency. The caller may fall back to getInstrLatency.
4359  if (!ItinData || ItinData->isEmpty())
4360  return -1;
4361 
4362  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4363  Register Reg = DefMO.getReg();
4364 
4365  const MachineInstr *ResolvedDefMI = &DefMI;
4366  unsigned DefAdj = 0;
4367  if (DefMI.isBundle())
4368  ResolvedDefMI =
4369  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4370  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4371  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4372  return 1;
4373  }
4374 
4375  const MachineInstr *ResolvedUseMI = &UseMI;
4376  unsigned UseAdj = 0;
4377  if (UseMI.isBundle()) {
4378  ResolvedUseMI =
4379  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4380  if (!ResolvedUseMI)
4381  return -1;
4382  }
4383 
4384  return getOperandLatencyImpl(
4385  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4386  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4387 }
4388 
4389 int ARMBaseInstrInfo::getOperandLatencyImpl(
4390  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4391  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4392  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4393  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4394  if (Reg == ARM::CPSR) {
4395  if (DefMI.getOpcode() == ARM::FMSTAT) {
4396  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4397  return Subtarget.isLikeA9() ? 1 : 20;
4398  }
4399 
4400  // CPSR set and branch can be paired in the same cycle.
4401  if (UseMI.isBranch())
4402  return 0;
4403 
4404  // Otherwise it takes the instruction latency (generally one).
4405  unsigned Latency = getInstrLatency(ItinData, DefMI);
4406 
4407  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4408  // its uses. Instructions which are otherwise scheduled between them may
4409  // incur a code size penalty (not able to use the CPSR setting 16-bit
4410  // instructions).
4411  if (Latency > 0 && Subtarget.isThumb2()) {
4412  const MachineFunction *MF = DefMI.getParent()->getParent();
4413  // FIXME: Use Function::hasOptSize().
4414  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4415  --Latency;
4416  }
4417  return Latency;
4418  }
4419 
4420  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4421  return -1;
4422 
4423  unsigned DefAlign = DefMI.hasOneMemOperand()
4424  ? (*DefMI.memoperands_begin())->getAlign().value()
4425  : 0;
4426  unsigned UseAlign = UseMI.hasOneMemOperand()
4427  ? (*UseMI.memoperands_begin())->getAlign().value()
4428  : 0;
4429 
4430  // Get the itinerary's latency if possible, and handle variable_ops.
4431  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4432  UseIdx, UseAlign);
4433  // Unable to find operand latency. The caller may resort to getInstrLatency.
4434  if (Latency < 0)
4435  return Latency;
4436 
4437  // Adjust for IT block position.
4438  int Adj = DefAdj + UseAdj;
4439 
4440  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4441  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4442  if (Adj >= 0 || (int)Latency > -Adj) {
4443  return Latency + Adj;
4444  }
4445  // Return the itinerary latency, which may be zero but not less than zero.
4446  return Latency;
4447 }
4448 
4449 int
4451  SDNode *DefNode, unsigned DefIdx,
4452  SDNode *UseNode, unsigned UseIdx) const {
4453  if (!DefNode->isMachineOpcode())
4454  return 1;
4455 
4456  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4457 
4458  if (isZeroCost(DefMCID.Opcode))
4459  return 0;
4460 
4461  if (!ItinData || ItinData->isEmpty())
4462  return DefMCID.mayLoad() ? 3 : 1;
4463 
4464  if (!UseNode->isMachineOpcode()) {
4465  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4466  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4467  int Threshold = 1 + Adj;
4468  return Latency <= Threshold ? 1 : Latency - Adj;
4469  }
4470 
4471  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4472  auto *DefMN = cast<MachineSDNode>(DefNode);
4473  unsigned DefAlign = !DefMN->memoperands_empty()
4474  ? (*DefMN->memoperands_begin())->getAlign().value()
4475  : 0;
4476  auto *UseMN = cast<MachineSDNode>(UseNode);
4477  unsigned UseAlign = !UseMN->memoperands_empty()
4478  ? (*UseMN->memoperands_begin())->getAlign().value()
4479  : 0;
4480  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4481  UseMCID, UseIdx, UseAlign);
4482 
4483  if (Latency > 1 &&
4484  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4485  Subtarget.isCortexA7())) {
4486  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4487  // variants are one cycle cheaper.
4488  switch (DefMCID.getOpcode()) {
4489  default: break;
4490  case ARM::LDRrs:
4491  case ARM::LDRBrs: {
4492  unsigned ShOpVal =
4493  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4494  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4495  if (ShImm == 0 ||
4496  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4497  --Latency;
4498  break;
4499  }
4500  case ARM::t2LDRs:
4501  case ARM::t2LDRBs:
4502  case ARM::t2LDRHs:
4503  case ARM::t2LDRSHs: {
4504  // Thumb2 mode: lsl only.
4505  unsigned ShAmt =
4506  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4507  if (ShAmt == 0 || ShAmt == 2)
4508  --Latency;
4509  break;
4510  }
4511  }
4512  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4513  // FIXME: Properly handle all of the latency adjustments for address
4514  // writeback.
4515  switch (DefMCID.getOpcode()) {
4516  default: break;
4517  case ARM::LDRrs:
4518  case ARM::LDRBrs: {
4519  unsigned ShOpVal =
4520  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4521  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4522  if (ShImm == 0 ||
4523  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4524  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4525  Latency -= 2;
4526  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4527  --Latency;
4528  break;
4529  }
4530  case ARM::t2LDRs:
4531  case ARM::t2LDRBs:
4532  case ARM::t2LDRHs:
4533  case ARM::t2LDRSHs:
4534  // Thumb2 mode: lsl 0-3 only.
4535  Latency -= 2;
4536  break;
4537  }
4538  }
4539 
4540  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4541  switch (DefMCID.getOpcode()) {
4542  default: break;
4543  case ARM::VLD1q8:
4544  case ARM::VLD1q16:
4545  case ARM::VLD1q32:
4546  case ARM::VLD1q64:
4547  case ARM::VLD1q8wb_register:
4548  case ARM::VLD1q16wb_register:
4549  case ARM::VLD1q32wb_register:
4550  case ARM::VLD1q64wb_register:
4551  case ARM::VLD1q8wb_fixed:
4552  case ARM::VLD1q16wb_fixed:
4553  case ARM::VLD1q32wb_fixed:
4554  case ARM::VLD1q64wb_fixed:
4555  case ARM::VLD2d8:
4556  case ARM::VLD2d16:
4557  case ARM::VLD2d32:
4558  case ARM::VLD2q8Pseudo:
4559  case ARM::VLD2q16Pseudo:
4560  case ARM::VLD2q32Pseudo:
4561  case ARM::VLD2d8wb_fixed:
4562  case ARM::VLD2d16wb_fixed:
4563  case ARM::VLD2d32wb_fixed:
4564  case ARM::VLD2q8PseudoWB_fixed:
4565  case ARM::VLD2q16PseudoWB_fixed:
4566  case ARM::VLD2q32PseudoWB_fixed:
4567  case ARM::VLD2d8wb_register:
4568  case ARM::VLD2d16wb_register:
4569  case ARM::VLD2d32wb_register:
4570  case ARM::VLD2q8PseudoWB_register:
4571  case ARM::VLD2q16PseudoWB_register:
4572  case ARM::VLD2q32PseudoWB_register:
4573  case ARM::VLD3d8Pseudo:
4574  case ARM::VLD3d16Pseudo:
4575  case ARM::VLD3d32Pseudo:
4576  case ARM::VLD1d8TPseudo:
4577  case ARM::VLD1d16TPseudo:
4578  case ARM::VLD1d32TPseudo:
4579  case ARM::VLD1d64TPseudo:
4580  case ARM::VLD1d64TPseudoWB_fixed:
4581  case ARM::VLD1d64TPseudoWB_register:
4582  case ARM::VLD3d8Pseudo_UPD:
4583  case ARM::VLD3d16Pseudo_UPD:
4584  case ARM::VLD3d32Pseudo_UPD:
4585  case ARM::VLD3q8Pseudo_UPD:
4586  case ARM::VLD3q16Pseudo_UPD:
4587  case ARM::VLD3q32Pseudo_UPD:
4588  case ARM::VLD3q8oddPseudo:
4589  case ARM::VLD3q16oddPseudo:
4590  case ARM::VLD3q32oddPseudo:
4591  case ARM::VLD3q8oddPseudo_UPD:
4592  case ARM::VLD3q16oddPseudo_UPD:
4593  case ARM::VLD3q32oddPseudo_UPD:
4594  case ARM::VLD4d8Pseudo:
4595  case ARM::VLD4d16Pseudo:
4596  case ARM::VLD4d32Pseudo:
4597  case ARM::VLD1d8QPseudo:
4598  case ARM::VLD1d16QPseudo:
4599  case ARM::VLD1d32QPseudo:
4600  case ARM::VLD1d64QPseudo:
4601  case ARM::VLD1d64QPseudoWB_fixed:
4602  case ARM::VLD1d64QPseudoWB_register:
4603  case ARM::VLD1q8HighQPseudo:
4604  case ARM::VLD1q8LowQPseudo_UPD:
4605  case ARM::VLD1q8HighTPseudo:
4606  case ARM::VLD1q8LowTPseudo_UPD:
4607  case ARM::VLD1q16HighQPseudo:
4608  case ARM::VLD1q16LowQPseudo_UPD:
4609  case ARM::VLD1q16HighTPseudo:
4610  case ARM::VLD1q16LowTPseudo_UPD:
4611  case ARM::VLD1q32HighQPseudo:
4612  case ARM::VLD1q32LowQPseudo_UPD:
4613  case ARM::VLD1q32HighTPseudo:
4614  case ARM::VLD1q32LowTPseudo_UPD:
4615  case ARM::VLD1q64HighQPseudo:
4616  case ARM::VLD1q64LowQPseudo_UPD:
4617  case ARM::VLD1q64HighTPseudo:
4618  case ARM::VLD1q64LowTPseudo_UPD:
4619  case ARM::VLD4d8Pseudo_UPD:
4620  case ARM::VLD4d16Pseudo_UPD:
4621  case ARM::VLD4d32Pseudo_UPD:
4622  case ARM::VLD4q8Pseudo_UPD:
4623  case ARM::VLD4q16Pseudo_UPD:
4624  case ARM::VLD4q32Pseudo_UPD:
4625  case ARM::VLD4q8oddPseudo:
4626  case ARM::VLD4q16oddPseudo:
4627  case ARM::VLD4q32oddPseudo:
4628  case ARM::VLD4q8oddPseudo_UPD:
4629  case ARM::VLD4q16oddPseudo_UPD:
4630  case ARM::VLD4q32oddPseudo_UPD:
4631  case ARM::VLD1DUPq8:
4632  case ARM::VLD1DUPq16:
4633  case ARM::VLD1DUPq32:
4634  case ARM::VLD1DUPq8wb_fixed:
4635  case ARM::VLD1DUPq16wb_fixed:
4636  case ARM::VLD1DUPq32wb_fixed:
4637  case ARM::VLD1DUPq8wb_register:
4638  case ARM::VLD1DUPq16wb_register:
4639  case ARM::VLD1DUPq32wb_register:
4640  case ARM::VLD2DUPd8:
4641  case ARM::VLD2DUPd16:
4642  case ARM::VLD2DUPd32:
4643  case ARM::VLD2DUPd8wb_fixed:
4644  case ARM::VLD2DUPd16wb_fixed:
4645  case ARM::VLD2DUPd32wb_fixed:
4646  case ARM::VLD2DUPd8wb_register:
4647  case ARM::VLD2DUPd16wb_register:
4648  case ARM::VLD2DUPd32wb_register:
4649  case ARM::VLD2DUPq8EvenPseudo:
4650  case ARM::VLD2DUPq8OddPseudo:
4651  case ARM::VLD2DUPq16EvenPseudo:
4652  case ARM::VLD2DUPq16OddPseudo:
4653  case ARM::VLD2DUPq32EvenPseudo:
4654  case ARM::VLD2DUPq32OddPseudo:
4655  case ARM::VLD3DUPq8EvenPseudo:
4656  case ARM::VLD3DUPq8OddPseudo:
4657  case ARM::VLD3DUPq16EvenPseudo:
4658  case ARM::VLD3DUPq16OddPseudo:
4659  case ARM::VLD3DUPq32EvenPseudo:
4660  case ARM::VLD3DUPq32OddPseudo:
4661  case ARM::VLD4DUPd8Pseudo:
4662  case ARM::VLD4DUPd16Pseudo:
4663  case ARM::VLD4DUPd32Pseudo:
4664  case ARM::VLD4DUPd8Pseudo_UPD:
4665  case ARM::VLD4DUPd16Pseudo_UPD:
4666  case ARM::VLD4DUPd32Pseudo_UPD:
4667  case ARM::VLD4DUPq8EvenPseudo:
4668  case ARM::VLD4DUPq8OddPseudo:
4669  case ARM::VLD4DUPq16EvenPseudo:
4670  case ARM::VLD4DUPq16OddPseudo:
4671  case ARM::VLD4DUPq32EvenPseudo:
4672  case ARM::VLD4DUPq32OddPseudo:
4673  case ARM::VLD1LNq8Pseudo:
4674  case ARM::VLD1LNq16Pseudo:
4675  case ARM::VLD1LNq32Pseudo:
4676  case ARM::VLD1LNq8Pseudo_UPD:
4677  case ARM::VLD1LNq16Pseudo_UPD:
4678  case ARM::VLD1LNq32Pseudo_UPD:
4679  case ARM::VLD2LNd8Pseudo:
4680  case ARM::VLD2LNd16Pseudo:
4681  case ARM::VLD2LNd32Pseudo:
4682  case ARM::VLD2LNq16Pseudo:
4683  case ARM::VLD2LNq32Pseudo:
4684  case ARM::VLD2LNd8Pseudo_UPD:
4685  case ARM::VLD2LNd16Pseudo_UPD:
4686  case ARM::VLD2LNd32Pseudo_UPD:
4687  case ARM::VLD2LNq16Pseudo_UPD:
4688  case ARM::VLD2LNq32Pseudo_UPD:
4689  case ARM::VLD4LNd8Pseudo:
4690  case ARM::VLD4LNd16Pseudo:
4691  case ARM::VLD4LNd32Pseudo:
4692  case ARM::VLD4LNq16Pseudo:
4693  case ARM::VLD4LNq32Pseudo:
4694  case ARM::VLD4LNd8Pseudo_UPD:
4695  case ARM::VLD4LNd16Pseudo_UPD:
4696  case ARM::VLD4LNd32Pseudo_UPD:
4697  case ARM::VLD4LNq16Pseudo_UPD:
4698  case ARM::VLD4LNq32Pseudo_UPD:
4699  // If the address is not 64-bit aligned, the latencies of these
4700  // instructions increases by one.
4701  ++Latency;
4702  break;
4703  }
4704 
4705  return Latency;
4706 }
4707 
4708 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4709  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4710  MI.isImplicitDef())
4711  return 0;
4712 
4713  if (MI.isBundle())
4714  return 0;
4715 
4716  const MCInstrDesc &MCID = MI.getDesc();
4717 
4718  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4719  !Subtarget.cheapPredicableCPSRDef())) {
4720  // When predicated, CPSR is an additional source operand for CPSR updating
4721  // instructions, this apparently increases their latencies.
4722  return 1;
4723  }
4724  return 0;
4725 }
4726 
4727 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4728  const MachineInstr &MI,
4729  unsigned *PredCost) const {
4730  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4731  MI.isImplicitDef())
4732  return 1;
4733 
4734  // An instruction scheduler typically runs on unbundled instructions, however
4735  // other passes may query the latency of a bundled instruction.
4736  if (MI.isBundle()) {
4737  unsigned Latency = 0;
4739  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4740  while (++I != E && I->isInsideBundle()) {
4741  if (I->getOpcode() != ARM::t2IT)
4742  Latency += getInstrLatency(ItinData, *I, PredCost);
4743  }
4744  return Latency;
4745  }
4746 
4747  const MCInstrDesc &MCID = MI.getDesc();
4748  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4749  !Subtarget.cheapPredicableCPSRDef()))) {
4750  // When predicated, CPSR is an additional source operand for CPSR updating
4751  // instructions, this apparently increases their latencies.
4752  *PredCost = 1;
4753  }
4754  // Be sure to call getStageLatency for an empty itinerary in case it has a
4755  // valid MinLatency property.
4756  if (!ItinData)
4757  return MI.mayLoad() ? 3 : 1;
4758 
4759  unsigned Class = MCID.getSchedClass();
4760 
4761  // For instructions with variable uops, use uops as latency.
4762  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4763  return getNumMicroOps(ItinData, MI);
4764 
4765  // For the common case, fall back on the itinerary's latency.
4766  unsigned Latency = ItinData->getStageLatency(Class);
4767 
4768  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4769  unsigned DefAlign =
4770  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4771  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4772  if (Adj >= 0 || (int)Latency > -Adj) {
4773  return Latency + Adj;
4774  }
4775  return Latency;
4776 }
4777 
4778 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4779  SDNode *Node) const {
4780  if (!Node->isMachineOpcode())
4781  return 1;
4782 
4783  if (!ItinData || ItinData->isEmpty())
4784  return 1;
4785 
4786  unsigned Opcode = Node->getMachineOpcode();
4787  switch (Opcode) {
4788  default:
4789  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4790  case ARM::VLDMQIA:
4791  case ARM::VSTMQIA:
4792  return 2;
4793  }
4794 }
4795 
4796 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4797  const MachineRegisterInfo *MRI,
4798  const MachineInstr &DefMI,
4799  unsigned DefIdx,
4800  const MachineInstr &UseMI,
4801  unsigned UseIdx) const {
4802  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4803  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4804  if (Subtarget.nonpipelinedVFP() &&
4805  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4806  return true;
4807 
4808  // Hoist VFP / NEON instructions with 4 or higher latency.
4809  unsigned Latency =
4810  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4811  if (Latency <= 3)
4812  return false;
4813  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4814  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4815 }
4816 
4817 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4818  const MachineInstr &DefMI,
4819  unsigned DefIdx) const {
4820  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4821  if (!ItinData || ItinData->isEmpty())
4822  return false;
4823 
4824  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4825  if (DDomain == ARMII::DomainGeneral) {
4826  unsigned DefClass = DefMI.getDesc().getSchedClass();
4827  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4828  return (DefCycle != -1 && DefCycle <= 2);
4829  }
4830  return false;
4831 }
4832 
4833 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4834  StringRef &ErrInfo) const {
4835  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4836  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4837  return false;
4838  }
4839  if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4840  // Make sure we don't generate a lo-lo mov that isn't supported.
4841  if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4842  !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4843  ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4844  return false;
4845  }
4846  }
4847  if (MI.getOpcode() == ARM::tPUSH ||
4848  MI.getOpcode() == ARM::tPOP ||
4849  MI.getOpcode() == ARM::tPOP_RET) {
4850  for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4851  if (MO.isImplicit() || !MO.isReg())
4852  continue;
4853  Register Reg = MO.getReg();
4854  if (Reg < ARM::R0 || Reg > ARM::R7) {
4855  if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4856  !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4857  ErrInfo = "Unsupported register in Thumb1 push/pop";
4858  return false;
4859  }
4860  }
4861  }
4862  }
4863  if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4864  assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4865  if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4866  MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4867  ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4868  return false;
4869  }
4870  }
4871 
4872  // Check the address model by taking the first Imm operand and checking it is
4873  // legal for that addressing mode.
4875  (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4876  switch (AddrMode) {
4877  default:
4878  break;
4879  case ARMII::AddrModeT2_i7:
4882  case ARMII::AddrModeT2_i8:
4886  case ARMII::AddrModeT2_i12: {
4887  uint32_t Imm = 0;
4888  for (auto Op : MI.operands()) {
4889  if (Op.isImm()) {
4890  Imm = Op.getImm();
4891  break;
4892  }
4893  }
4894  if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4895  ErrInfo = "Incorrect AddrMode Imm for instruction";
4896  return false;
4897  }
4898  break;
4899  }
4900  }
4901  return true;
4902 }
4903 
4905  unsigned LoadImmOpc,
4906  unsigned LoadOpc) const {
4907  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4908  "ROPI/RWPI not currently supported with stack guard");
4909 
4910  MachineBasicBlock &MBB = *MI->getParent();
4911  DebugLoc DL = MI->getDebugLoc();
4912  Register Reg = MI->getOperand(0).getReg();
4913  MachineInstrBuilder MIB;
4914  unsigned int Offset = 0;
4915 
4916  if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4917  assert(Subtarget.isReadTPHard() &&
4918  "TLS stack protector requires hardware TLS register");
4919 
4920  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4921  .addImm(15)
4922  .addImm(0)
4923  .addImm(13)
4924  .addImm(0)
4925  .addImm(3)
4926  .add(predOps(ARMCC::AL));
4927 
4929  Offset = M.getStackProtectorGuardOffset();
4930  if (Offset & ~0xfffU) {
4931  // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4932  // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4933  // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4934  unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4935  BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4937  .addImm(Offset & ~0xfffU)
4938  .add(predOps(ARMCC::AL))
4939  .addReg(0);
4940  Offset &= 0xfffU;
4941  }
4942  } else {
4943  const GlobalValue *GV =
4944  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4945  bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4946 
4947  unsigned TargetFlags = ARMII::MO_NO_FLAG;
4948  if (Subtarget.isTargetMachO()) {
4949  TargetFlags |= ARMII::MO_NONLAZY;
4950  } else if (Subtarget.isTargetCOFF()) {
4951  if (GV->hasDLLImportStorageClass())
4952  TargetFlags |= ARMII::MO_DLLIMPORT;
4953  else if (IsIndirect)
4954  TargetFlags |= ARMII::MO_COFFSTUB;
4955  } else if (Subtarget.isGVInGOT(GV)) {
4956  TargetFlags |= ARMII::MO_GOT;
4957  }
4958 
4959  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4960  .addGlobalAddress(GV, 0, TargetFlags);
4961 
4962  if (IsIndirect) {
4963  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4964  MIB.addReg(Reg, RegState::Kill).addImm(0);
4965  auto Flags = MachineMemOperand::MOLoad |
4969  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4970  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4971  }
4972  }
4973 
4974  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4975  MIB.addReg(Reg, RegState::Kill)
4976  .addImm(Offset)
4977  .cloneMemRefs(*MI)
4978  .add(predOps(ARMCC::AL));
4979 }
4980 
4981 bool
4982 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4983  unsigned &AddSubOpc,
4984  bool &NegAcc, bool &HasLane) const {
4985  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4986  if (I == MLxEntryMap.end())
4987  return false;
4988 
4989  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4990  MulOpc = Entry.MulOpc;
4991  AddSubOpc = Entry.AddSubOpc;
4992  NegAcc = Entry.NegAcc;
4993  HasLane = Entry.HasLane;
4994  return true;
4995 }
4996 
4997 //===----------------------------------------------------------------------===//
4998 // Execution domains.
4999 //===----------------------------------------------------------------------===//
5000 //
5001 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5002 // and some can go down both. The vmov instructions go down the VFP pipeline,
5003 // but they can be changed to vorr equivalents that are executed by the NEON
5004 // pipeline.
5005 //
5006 // We use the following execution domain numbering:
5007 //
5010  ExeVFP = 1,
5012 };
5013 
5014 //
5015 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5016 //
5017 std::pair<uint16_t, uint16_t>
5019  // If we don't have access to NEON instructions then we won't be able
5020  // to swizzle anything to the NEON domain. Check to make sure.
5021  if (Subtarget.hasNEON()) {
5022  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5023  // if they are not predicated.
5024  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5025  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5026 
5027  // CortexA9 is particularly picky about mixing the two and wants these
5028  // converted.
5029  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5030  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5031  MI.getOpcode() == ARM::VMOVS))
5032  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5033  }
5034  // No other instructions can be swizzled, so just determine their domain.
5035  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5036 
5037  if (Domain & ARMII::DomainNEON)
5038  return std::make_pair(ExeNEON, 0);
5039 
5040  // Certain instructions can go either way on Cortex-A8.
5041  // Treat them as NEON instructions.
5042  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5043  return std::make_pair(ExeNEON, 0);
5044 
5045  if (Domain & ARMII::DomainVFP)
5046  return std::make_pair(ExeVFP, 0);
5047 
5048  return std::make_pair(ExeGeneric, 0);
5049 }
5050 
5052  unsigned SReg, unsigned &Lane) {
5053  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5054  Lane = 0;
5055 
5056  if (DReg != ARM::NoRegister)
5057  return DReg;
5058 
5059  Lane = 1;
5060  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5061 
5062  assert(DReg && "S-register with no D super-register?");
5063  return DReg;
5064 }
5065 
5066 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5067 /// set ImplicitSReg to a register number that must be marked as implicit-use or
5068 /// zero if no register needs to be defined as implicit-use.
5069 ///
5070 /// If the function cannot determine if an SPR should be marked implicit use or
5071 /// not, it returns false.
5072 ///
5073 /// This function handles cases where an instruction is being modified from taking
5074 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5075 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5076 /// lane of the DPR).
5077 ///
5078 /// If the other SPR is defined, an implicit-use of it should be added. Else,
5079 /// (including the case where the DPR itself is defined), it should not.
5080 ///
5082  MachineInstr &MI, unsigned DReg,
5083  unsigned Lane, unsigne