LLVM  13.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "MVETailPredUtils.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/MC/MCAsmInfo.h"
52 #include "llvm/MC/MCInstrDesc.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <new>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "arm-instrinfo"
73 
74 #define GET_INSTRINFO_CTOR_DTOR
75 #include "ARMGenInstrInfo.inc"
76 
77 static cl::opt<bool>
78 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
79  cl::desc("Enable ARM 2-addr to 3-addr conv"));
80 
81 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
82 struct ARM_MLxEntry {
83  uint16_t MLxOpc; // MLA / MLS opcode
84  uint16_t MulOpc; // Expanded multiplication opcode
85  uint16_t AddSubOpc; // Expanded add / sub opcode
86  bool NegAcc; // True if the acc is negated before the add / sub.
87  bool HasLane; // True if instruction has an extra "lane" operand.
88 };
89 
90 static const ARM_MLxEntry ARM_MLxTable[] = {
91  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
92  // fp scalar ops
93  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
94  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
95  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
96  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
97  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
98  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
99  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
100  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
101 
102  // fp SIMD ops
103  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
104  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
105  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
106  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
107  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
108  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
109  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
110  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
111 };
112 
114  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
115  Subtarget(STI) {
116  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
117  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
118  llvm_unreachable("Duplicated entries?");
119  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
120  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
121  }
122 }
123 
124 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
125 // currently defaults to no prepass hazard recognizer.
128  const ScheduleDAG *DAG) const {
129  if (usePreRAHazardRecognizer()) {
130  const InstrItineraryData *II =
131  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
132  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
133  }
135 }
136 
137 // Called during:
138 // - pre-RA scheduling
139 // - post-RA scheduling when FeatureUseMISched is set
141  const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
143 
144  // We would like to restrict this hazard recognizer to only
145  // post-RA scheduling; we can tell that we're post-RA because we don't
146  // track VRegLiveness.
147  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
148  // banks banked on bit 2. Assume that TCMs are in use.
149  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
150  MHR->AddHazardRecognizer(
151  std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
152 
153  // Not inserting ARMHazardRecognizerFPMLx because that would change
154  // legacy behavior
155 
157  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
158  return MHR;
159 }
160 
161 // Called during post-RA scheduling when FeatureUseMISched is not set
164  const ScheduleDAG *DAG) const {
166 
167  if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
168  MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
169 
171  if (BHR)
172  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
173  return MHR;
174 }
175 
178  // FIXME: Thumb2 support.
179 
180  if (!EnableARM3Addr)
181  return nullptr;
182 
183  MachineFunction &MF = *MI.getParent()->getParent();
184  uint64_t TSFlags = MI.getDesc().TSFlags;
185  bool isPre = false;
186  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
187  default: return nullptr;
188  case ARMII::IndexModePre:
189  isPre = true;
190  break;
192  break;
193  }
194 
195  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
196  // operation.
197  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
198  if (MemOpc == 0)
199  return nullptr;
200 
201  MachineInstr *UpdateMI = nullptr;
202  MachineInstr *MemMI = nullptr;
203  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
204  const MCInstrDesc &MCID = MI.getDesc();
205  unsigned NumOps = MCID.getNumOperands();
206  bool isLoad = !MI.mayStore();
207  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
208  const MachineOperand &Base = MI.getOperand(2);
209  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
210  Register WBReg = WB.getReg();
211  Register BaseReg = Base.getReg();
212  Register OffReg = Offset.getReg();
213  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
214  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
215  switch (AddrMode) {
216  default: llvm_unreachable("Unknown indexed op!");
217  case ARMII::AddrMode2: {
218  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
219  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
220  if (OffReg == 0) {
221  if (ARM_AM::getSOImmVal(Amt) == -1)
222  // Can't encode it in a so_imm operand. This transformation will
223  // add more than 1 instruction. Abandon!
224  return nullptr;
225  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
226  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
227  .addReg(BaseReg)
228  .addImm(Amt)
229  .add(predOps(Pred))
230  .add(condCodeOp());
231  } else if (Amt != 0) {
233  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
234  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
235  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
236  .addReg(BaseReg)
237  .addReg(OffReg)
238  .addReg(0)
239  .addImm(SOOpc)
240  .add(predOps(Pred))
241  .add(condCodeOp());
242  } else
243  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
244  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
245  .addReg(BaseReg)
246  .addReg(OffReg)
247  .add(predOps(Pred))
248  .add(condCodeOp());
249  break;
250  }
251  case ARMII::AddrMode3 : {
252  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
253  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
254  if (OffReg == 0)
255  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
256  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
257  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
258  .addReg(BaseReg)
259  .addImm(Amt)
260  .add(predOps(Pred))
261  .add(condCodeOp());
262  else
263  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
264  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
265  .addReg(BaseReg)
266  .addReg(OffReg)
267  .add(predOps(Pred))
268  .add(condCodeOp());
269  break;
270  }
271  }
272 
273  std::vector<MachineInstr*> NewMIs;
274  if (isPre) {
275  if (isLoad)
276  MemMI =
277  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
278  .addReg(WBReg)
279  .addImm(0)
280  .addImm(Pred);
281  else
282  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
283  .addReg(MI.getOperand(1).getReg())
284  .addReg(WBReg)
285  .addReg(0)
286  .addImm(0)
287  .addImm(Pred);
288  NewMIs.push_back(MemMI);
289  NewMIs.push_back(UpdateMI);
290  } else {
291  if (isLoad)
292  MemMI =
293  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
294  .addReg(BaseReg)
295  .addImm(0)
296  .addImm(Pred);
297  else
298  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
299  .addReg(MI.getOperand(1).getReg())
300  .addReg(BaseReg)
301  .addReg(0)
302  .addImm(0)
303  .addImm(Pred);
304  if (WB.isDead())
305  UpdateMI->getOperand(0).setIsDead();
306  NewMIs.push_back(UpdateMI);
307  NewMIs.push_back(MemMI);
308  }
309 
310  // Transfer LiveVariables states, kill / dead info.
311  if (LV) {
312  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
313  MachineOperand &MO = MI.getOperand(i);
314  if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
315  Register Reg = MO.getReg();
316 
318  if (MO.isDef()) {
319  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
320  if (MO.isDead())
321  LV->addVirtualRegisterDead(Reg, *NewMI);
322  }
323  if (MO.isUse() && MO.isKill()) {
324  for (unsigned j = 0; j < 2; ++j) {
325  // Look at the two new MI's in reverse order.
326  MachineInstr *NewMI = NewMIs[j];
327  if (!NewMI->readsRegister(Reg))
328  continue;
329  LV->addVirtualRegisterKilled(Reg, *NewMI);
330  if (VI.removeKill(MI))
331  VI.Kills.push_back(NewMI);
332  break;
333  }
334  }
335  }
336  }
337  }
338 
339  MachineBasicBlock::iterator MBBI = MI.getIterator();
340  MFI->insert(MBBI, NewMIs[1]);
341  MFI->insert(MBBI, NewMIs[0]);
342  return NewMIs[0];
343 }
344 
345 // Branch analysis.
347  MachineBasicBlock *&TBB,
348  MachineBasicBlock *&FBB,
350  bool AllowModify) const {
351  TBB = nullptr;
352  FBB = nullptr;
353 
355  if (I == MBB.instr_begin())
356  return false; // Empty blocks are easy.
357  --I;
358 
359  // Walk backwards from the end of the basic block until the branch is
360  // analyzed or we give up.
361  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
362  // Flag to be raised on unanalyzeable instructions. This is useful in cases
363  // where we want to clean up on the end of the basic block before we bail
364  // out.
365  bool CantAnalyze = false;
366 
367  // Skip over DEBUG values, predicated nonterminators and speculation
368  // barrier terminators.
369  while (I->isDebugInstr() || !I->isTerminator() ||
370  isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
371  I->getOpcode() == ARM::t2DoLoopStartTP){
372  if (I == MBB.instr_begin())
373  return false;
374  --I;
375  }
376 
377  if (isIndirectBranchOpcode(I->getOpcode()) ||
378  isJumpTableBranchOpcode(I->getOpcode())) {
379  // Indirect branches and jump tables can't be analyzed, but we still want
380  // to clean up any instructions at the tail of the basic block.
381  CantAnalyze = true;
382  } else if (isUncondBranchOpcode(I->getOpcode())) {
383  TBB = I->getOperand(0).getMBB();
384  } else if (isCondBranchOpcode(I->getOpcode())) {
385  // Bail out if we encounter multiple conditional branches.
386  if (!Cond.empty())
387  return true;
388 
389  assert(!FBB && "FBB should have been null.");
390  FBB = TBB;
391  TBB = I->getOperand(0).getMBB();
392  Cond.push_back(I->getOperand(1));
393  Cond.push_back(I->getOperand(2));
394  } else if (I->isReturn()) {
395  // Returns can't be analyzed, but we should run cleanup.
396  CantAnalyze = true;
397  } else {
398  // We encountered other unrecognized terminator. Bail out immediately.
399  return true;
400  }
401 
402  // Cleanup code - to be run for unpredicated unconditional branches and
403  // returns.
404  if (!isPredicated(*I) &&
405  (isUncondBranchOpcode(I->getOpcode()) ||
406  isIndirectBranchOpcode(I->getOpcode()) ||
407  isJumpTableBranchOpcode(I->getOpcode()) ||
408  I->isReturn())) {
409  // Forget any previous condition branch information - it no longer applies.
410  Cond.clear();
411  FBB = nullptr;
412 
413  // If we can modify the function, delete everything below this
414  // unconditional branch.
415  if (AllowModify) {
416  MachineBasicBlock::iterator DI = std::next(I);
417  while (DI != MBB.instr_end()) {
418  MachineInstr &InstToDelete = *DI;
419  ++DI;
420  // Speculation barriers must not be deleted.
421  if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
422  continue;
423  InstToDelete.eraseFromParent();
424  }
425  }
426  }
427 
428  if (CantAnalyze) {
429  // We may not be able to analyze the block, but we could still have
430  // an unconditional branch as the last instruction in the block, which
431  // just branches to layout successor. If this is the case, then just
432  // remove it if we're allowed to make modifications.
433  if (AllowModify && !isPredicated(MBB.back()) &&
435  TBB && MBB.isLayoutSuccessor(TBB))
436  removeBranch(MBB);
437  return true;
438  }
439 
440  if (I == MBB.instr_begin())
441  return false;
442 
443  --I;
444  }
445 
446  // We made it past the terminators without bailing out - we must have
447  // analyzed this branch successfully.
448  return false;
449 }
450 
452  int *BytesRemoved) const {
453  assert(!BytesRemoved && "code size not handled");
454 
456  if (I == MBB.end())
457  return 0;
458 
459  if (!isUncondBranchOpcode(I->getOpcode()) &&
460  !isCondBranchOpcode(I->getOpcode()))
461  return 0;
462 
463  // Remove the branch.
464  I->eraseFromParent();
465 
466  I = MBB.end();
467 
468  if (I == MBB.begin()) return 1;
469  --I;
470  if (!isCondBranchOpcode(I->getOpcode()))
471  return 1;
472 
473  // Remove the branch.
474  I->eraseFromParent();
475  return 2;
476 }
477 
479  MachineBasicBlock *TBB,
480  MachineBasicBlock *FBB,
482  const DebugLoc &DL,
483  int *BytesAdded) const {
484  assert(!BytesAdded && "code size not handled");
486  int BOpc = !AFI->isThumbFunction()
487  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
488  int BccOpc = !AFI->isThumbFunction()
489  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
490  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
491 
492  // Shouldn't be a fall through.
493  assert(TBB && "insertBranch must not be told to insert a fallthrough");
494  assert((Cond.size() == 2 || Cond.size() == 0) &&
495  "ARM branch conditions have two components!");
496 
497  // For conditional branches, we use addOperand to preserve CPSR flags.
498 
499  if (!FBB) {
500  if (Cond.empty()) { // Unconditional branch?
501  if (isThumb)
502  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
503  else
504  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
505  } else
506  BuildMI(&MBB, DL, get(BccOpc))
507  .addMBB(TBB)
508  .addImm(Cond[0].getImm())
509  .add(Cond[1]);
510  return 1;
511  }
512 
513  // Two-way conditional branch.
514  BuildMI(&MBB, DL, get(BccOpc))
515  .addMBB(TBB)
516  .addImm(Cond[0].getImm())
517  .add(Cond[1]);
518  if (isThumb)
519  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
520  else
521  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
522  return 2;
523 }
524 
527  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
528  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
529  return false;
530 }
531 
533  if (MI.isBundle()) {
535  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
536  while (++I != E && I->isInsideBundle()) {
537  int PIdx = I->findFirstPredOperandIdx();
538  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
539  return true;
540  }
541  return false;
542  }
543 
544  int PIdx = MI.findFirstPredOperandIdx();
545  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
546 }
547 
549  const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
550  const TargetRegisterInfo *TRI) const {
551 
552  // First, let's see if there is a generic comment for this operand
553  std::string GenericComment =
555  if (!GenericComment.empty())
556  return GenericComment;
557 
558  // If not, check if we have an immediate operand.
559  if (Op.getType() != MachineOperand::MO_Immediate)
560  return std::string();
561 
562  // And print its corresponding condition code if the immediate is a
563  // predicate.
564  int FirstPredOp = MI.findFirstPredOperandIdx();
565  if (FirstPredOp != (int) OpIdx)
566  return std::string();
567 
568  std::string CC = "CC::";
569  CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
570  return CC;
571 }
572 
575  unsigned Opc = MI.getOpcode();
576  if (isUncondBranchOpcode(Opc)) {
577  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
578  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
579  .addImm(Pred[0].getImm())
580  .addReg(Pred[1].getReg());
581  return true;
582  }
583 
584  int PIdx = MI.findFirstPredOperandIdx();
585  if (PIdx != -1) {
586  MachineOperand &PMO = MI.getOperand(PIdx);
587  PMO.setImm(Pred[0].getImm());
588  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
589 
590  // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
591  // IT block. This affects how they are printed.
592  const MCInstrDesc &MCID = MI.getDesc();
594  assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
595  assert((MI.getOperand(1).isDead() ||
596  MI.getOperand(1).getReg() != ARM::CPSR) &&
597  "if conversion tried to stop defining used CPSR");
598  MI.getOperand(1).setReg(ARM::NoRegister);
599  }
600 
601  return true;
602  }
603  return false;
604 }
605 
607  ArrayRef<MachineOperand> Pred2) const {
608  if (Pred1.size() > 2 || Pred2.size() > 2)
609  return false;
610 
611  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
612  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
613  if (CC1 == CC2)
614  return true;
615 
616  switch (CC1) {
617  default:
618  return false;
619  case ARMCC::AL:
620  return true;
621  case ARMCC::HS:
622  return CC2 == ARMCC::HI;
623  case ARMCC::LS:
624  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
625  case ARMCC::GE:
626  return CC2 == ARMCC::GT;
627  case ARMCC::LE:
628  return CC2 == ARMCC::LT;
629  }
630 }
631 
633  std::vector<MachineOperand> &Pred,
634  bool SkipDead) const {
635  bool Found = false;
636  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
637  const MachineOperand &MO = MI.getOperand(i);
638  bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
639  bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
640  if (ClobbersCPSR || IsCPSR) {
641 
642  // Filter out T1 instructions that have a dead CPSR,
643  // allowing IT blocks to be generated containing T1 instructions
644  const MCInstrDesc &MCID = MI.getDesc();
645  if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
646  SkipDead)
647  continue;
648 
649  Pred.push_back(MO);
650  Found = true;
651  }
652  }
653 
654  return Found;
655 }
656 
658  for (const auto &MO : MI.operands())
659  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
660  return true;
661  return false;
662 }
663 
664 static bool isEligibleForITBlock(const MachineInstr *MI) {
665  switch (MI->getOpcode()) {
666  default: return true;
667  case ARM::tADC: // ADC (register) T1
668  case ARM::tADDi3: // ADD (immediate) T1
669  case ARM::tADDi8: // ADD (immediate) T2
670  case ARM::tADDrr: // ADD (register) T1
671  case ARM::tAND: // AND (register) T1
672  case ARM::tASRri: // ASR (immediate) T1
673  case ARM::tASRrr: // ASR (register) T1
674  case ARM::tBIC: // BIC (register) T1
675  case ARM::tEOR: // EOR (register) T1
676  case ARM::tLSLri: // LSL (immediate) T1
677  case ARM::tLSLrr: // LSL (register) T1
678  case ARM::tLSRri: // LSR (immediate) T1
679  case ARM::tLSRrr: // LSR (register) T1
680  case ARM::tMUL: // MUL T1
681  case ARM::tMVN: // MVN (register) T1
682  case ARM::tORR: // ORR (register) T1
683  case ARM::tROR: // ROR (register) T1
684  case ARM::tRSB: // RSB (immediate) T1
685  case ARM::tSBC: // SBC (register) T1
686  case ARM::tSUBi3: // SUB (immediate) T1
687  case ARM::tSUBi8: // SUB (immediate) T2
688  case ARM::tSUBrr: // SUB (register) T1
690  }
691 }
692 
693 /// isPredicable - Return true if the specified instruction can be predicated.
694 /// By default, this returns true for every instruction with a
695 /// PredicateOperand.
697  if (!MI.isPredicable())
698  return false;
699 
700  if (MI.isBundle())
701  return false;
702 
703  if (!isEligibleForITBlock(&MI))
704  return false;
705 
706  const MachineFunction *MF = MI.getParent()->getParent();
707  const ARMFunctionInfo *AFI =
708  MF->getInfo<ARMFunctionInfo>();
709 
710  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
711  // In their ARM encoding, they can't be encoded in a conditional form.
712  if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
713  return false;
714 
715  // Make indirect control flow changes unpredicable when SLS mitigation is
716  // enabled.
717  const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
718  if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
719  return false;
720  if (ST.hardenSlsBlr() && isIndirectCall(MI))
721  return false;
722 
723  if (AFI->isThumb2Function()) {
724  if (getSubtarget().restrictIT())
725  return isV8EligibleForIT(&MI);
726  }
727 
728  return true;
729 }
730 
731 namespace llvm {
732 
733 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
734  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
735  const MachineOperand &MO = MI->getOperand(i);
736  if (!MO.isReg() || MO.isUndef() || MO.isUse())
737  continue;
738  if (MO.getReg() != ARM::CPSR)
739  continue;
740  if (!MO.isDead())
741  return false;
742  }
743  // all definitions of CPSR are dead
744  return true;
745 }
746 
747 } // end namespace llvm
748 
749 /// GetInstSize - Return the size of the specified MachineInstr.
750 ///
752  const MachineBasicBlock &MBB = *MI.getParent();
753  const MachineFunction *MF = MBB.getParent();
754  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
755 
756  const MCInstrDesc &MCID = MI.getDesc();
757  if (MCID.getSize())
758  return MCID.getSize();
759 
760  switch (MI.getOpcode()) {
761  default:
762  // pseudo-instruction sizes are zero.
763  return 0;
764  case TargetOpcode::BUNDLE:
765  return getInstBundleLength(MI);
766  case ARM::MOVi16_ga_pcrel:
767  case ARM::MOVTi16_ga_pcrel:
768  case ARM::t2MOVi16_ga_pcrel:
769  case ARM::t2MOVTi16_ga_pcrel:
770  return 4;
771  case ARM::MOVi32imm:
772  case ARM::t2MOVi32imm:
773  return 8;
774  case ARM::CONSTPOOL_ENTRY:
775  case ARM::JUMPTABLE_INSTS:
776  case ARM::JUMPTABLE_ADDRS:
777  case ARM::JUMPTABLE_TBB:
778  case ARM::JUMPTABLE_TBH:
779  // If this machine instr is a constant pool entry, its size is recorded as
780  // operand #2.
781  return MI.getOperand(2).getImm();
782  case ARM::Int_eh_sjlj_longjmp:
783  return 16;
784  case ARM::tInt_eh_sjlj_longjmp:
785  return 10;
786  case ARM::tInt_WIN_eh_sjlj_longjmp:
787  return 12;
788  case ARM::Int_eh_sjlj_setjmp:
789  case ARM::Int_eh_sjlj_setjmp_nofp:
790  return 20;
791  case ARM::tInt_eh_sjlj_setjmp:
792  case ARM::t2Int_eh_sjlj_setjmp:
793  case ARM::t2Int_eh_sjlj_setjmp_nofp:
794  return 12;
795  case ARM::SPACE:
796  return MI.getOperand(1).getImm();
797  case ARM::INLINEASM:
798  case ARM::INLINEASM_BR: {
799  // If this machine instr is an inline asm, measure it.
800  unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
801  if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
802  Size = alignTo(Size, 4);
803  return Size;
804  }
805  case ARM::SpeculationBarrierISBDSBEndBB:
806  case ARM::t2SpeculationBarrierISBDSBEndBB:
807  // This gets lowered to 2 4-byte instructions.
808  return 8;
809  case ARM::SpeculationBarrierSBEndBB:
810  case ARM::t2SpeculationBarrierSBEndBB:
811  // This gets lowered to 1 4-byte instructions.
812  return 4;
813  }
814 }
815 
816 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
817  unsigned Size = 0;
819  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
820  while (++I != E && I->isInsideBundle()) {
821  assert(!I->isBundle() && "No nested bundle!");
823  }
824  return Size;
825 }
826 
829  unsigned DestReg, bool KillSrc,
830  const ARMSubtarget &Subtarget) const {
831  unsigned Opc = Subtarget.isThumb()
832  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
833  : ARM::MRS;
834 
835  MachineInstrBuilder MIB =
836  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
837 
838  // There is only 1 A/R class MRS instruction, and it always refers to
839  // APSR. However, there are lots of other possibilities on M-class cores.
840  if (Subtarget.isMClass())
841  MIB.addImm(0x800);
842 
843  MIB.add(predOps(ARMCC::AL))
844  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
845 }
846 
849  unsigned SrcReg, bool KillSrc,
850  const ARMSubtarget &Subtarget) const {
851  unsigned Opc = Subtarget.isThumb()
852  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
853  : ARM::MSR;
854 
855  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
856 
857  if (Subtarget.isMClass())
858  MIB.addImm(0x800);
859  else
860  MIB.addImm(8);
861 
862  MIB.addReg(SrcReg, getKillRegState(KillSrc))
865 }
866 
868  MIB.addImm(ARMVCC::None);
869  MIB.addReg(0);
870 }
871 
873  Register DestReg) {
875  MIB.addReg(DestReg, RegState::Undef);
876 }
877 
879  MIB.addImm(Cond);
880  MIB.addReg(ARM::VPR, RegState::Implicit);
881 }
882 
884  unsigned Cond, unsigned Inactive) {
886  MIB.addReg(Inactive);
887 }
888 
891  const DebugLoc &DL, MCRegister DestReg,
892  MCRegister SrcReg, bool KillSrc) const {
893  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
894  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
895 
896  if (GPRDest && GPRSrc) {
897  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
898  .addReg(SrcReg, getKillRegState(KillSrc))
900  .add(condCodeOp());
901  return;
902  }
903 
904  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
905  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
906 
907  unsigned Opc = 0;
908  if (SPRDest && SPRSrc)
909  Opc = ARM::VMOVS;
910  else if (GPRDest && SPRSrc)
911  Opc = ARM::VMOVRS;
912  else if (SPRDest && GPRSrc)
913  Opc = ARM::VMOVSR;
914  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
915  Opc = ARM::VMOVD;
916  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
917  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
918 
919  if (Opc) {
920  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
921  MIB.addReg(SrcReg, getKillRegState(KillSrc));
922  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
923  MIB.addReg(SrcReg, getKillRegState(KillSrc));
924  if (Opc == ARM::MVE_VORR)
925  addUnpredicatedMveVpredROp(MIB, DestReg);
926  else
927  MIB.add(predOps(ARMCC::AL));
928  return;
929  }
930 
931  // Handle register classes that require multiple instructions.
932  unsigned BeginIdx = 0;
933  unsigned SubRegs = 0;
934  int Spacing = 1;
935 
936  // Use VORRq when possible.
937  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
938  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
939  BeginIdx = ARM::qsub_0;
940  SubRegs = 2;
941  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
942  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
943  BeginIdx = ARM::qsub_0;
944  SubRegs = 4;
945  // Fall back to VMOVD.
946  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
947  Opc = ARM::VMOVD;
948  BeginIdx = ARM::dsub_0;
949  SubRegs = 2;
950  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
951  Opc = ARM::VMOVD;
952  BeginIdx = ARM::dsub_0;
953  SubRegs = 3;
954  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
955  Opc = ARM::VMOVD;
956  BeginIdx = ARM::dsub_0;
957  SubRegs = 4;
958  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
959  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
960  BeginIdx = ARM::gsub_0;
961  SubRegs = 2;
962  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
963  Opc = ARM::VMOVD;
964  BeginIdx = ARM::dsub_0;
965  SubRegs = 2;
966  Spacing = 2;
967  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
968  Opc = ARM::VMOVD;
969  BeginIdx = ARM::dsub_0;
970  SubRegs = 3;
971  Spacing = 2;
972  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
973  Opc = ARM::VMOVD;
974  BeginIdx = ARM::dsub_0;
975  SubRegs = 4;
976  Spacing = 2;
977  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
978  !Subtarget.hasFP64()) {
979  Opc = ARM::VMOVS;
980  BeginIdx = ARM::ssub_0;
981  SubRegs = 2;
982  } else if (SrcReg == ARM::CPSR) {
983  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
984  return;
985  } else if (DestReg == ARM::CPSR) {
986  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
987  return;
988  } else if (DestReg == ARM::VPR) {
989  assert(ARM::GPRRegClass.contains(SrcReg));
990  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
991  .addReg(SrcReg, getKillRegState(KillSrc))
992  .add(predOps(ARMCC::AL));
993  return;
994  } else if (SrcReg == ARM::VPR) {
995  assert(ARM::GPRRegClass.contains(DestReg));
996  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
997  .addReg(SrcReg, getKillRegState(KillSrc))
998  .add(predOps(ARMCC::AL));
999  return;
1000  } else if (DestReg == ARM::FPSCR_NZCV) {
1001  assert(ARM::GPRRegClass.contains(SrcReg));
1002  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1003  .addReg(SrcReg, getKillRegState(KillSrc))
1004  .add(predOps(ARMCC::AL));
1005  return;
1006  } else if (SrcReg == ARM::FPSCR_NZCV) {
1007  assert(ARM::GPRRegClass.contains(DestReg));
1008  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1009  .addReg(SrcReg, getKillRegState(KillSrc))
1010  .add(predOps(ARMCC::AL));
1011  return;
1012  }
1013 
1014  assert(Opc && "Impossible reg-to-reg copy");
1015 
1017  MachineInstrBuilder Mov;
1018 
1019  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1020  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1021  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1022  Spacing = -Spacing;
1023  }
1024 #ifndef NDEBUG
1025  SmallSet<unsigned, 4> DstRegs;
1026 #endif
1027  for (unsigned i = 0; i != SubRegs; ++i) {
1028  Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1029  Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1030  assert(Dst && Src && "Bad sub-register");
1031 #ifndef NDEBUG
1032  assert(!DstRegs.count(Src) && "destructive vector copy");
1033  DstRegs.insert(Dst);
1034 #endif
1035  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1036  // VORR (NEON or MVE) takes two source operands.
1037  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1038  Mov.addReg(Src);
1039  }
1040  // MVE VORR takes predicate operands in place of an ordinary condition.
1041  if (Opc == ARM::MVE_VORR)
1042  addUnpredicatedMveVpredROp(Mov, Dst);
1043  else
1044  Mov = Mov.add(predOps(ARMCC::AL));
1045  // MOVr can set CC.
1046  if (Opc == ARM::MOVr)
1047  Mov = Mov.add(condCodeOp());
1048  }
1049  // Add implicit super-register defs and kills to the last instruction.
1050  Mov->addRegisterDefined(DestReg, TRI);
1051  if (KillSrc)
1052  Mov->addRegisterKilled(SrcReg, TRI);
1053 }
1054 
1057  // VMOVRRD is also a copy instruction but it requires
1058  // special way of handling. It is more complex copy version
1059  // and since that we are not considering it. For recognition
1060  // of such instruction isExtractSubregLike MI interface fuction
1061  // could be used.
1062  // VORRq is considered as a move only if two inputs are
1063  // the same register.
1064  if (!MI.isMoveReg() ||
1065  (MI.getOpcode() == ARM::VORRq &&
1066  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1067  return None;
1068  return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1069 }
1070 
1073  Register Reg) const {
1074  if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1075  Register DstReg = DstSrcPair->Destination->getReg();
1076 
1077  // TODO: We don't handle cases where the forwarding reg is narrower/wider
1078  // than the copy registers. Consider for example:
1079  //
1080  // s16 = VMOVS s0
1081  // s17 = VMOVS s1
1082  // call @callee(d0)
1083  //
1084  // We'd like to describe the call site value of d0 as d8, but this requires
1085  // gathering and merging the descriptions for the two VMOVS instructions.
1086  //
1087  // We also don't handle the reverse situation, where the forwarding reg is
1088  // narrower than the copy destination:
1089  //
1090  // d8 = VMOVD d0
1091  // call @callee(s1)
1092  //
1093  // We need to produce a fragment description (the call site value of s1 is
1094  // /not/ just d8).
1095  if (DstReg != Reg)
1096  return None;
1097  }
1099 }
1100 
1101 const MachineInstrBuilder &
1103  unsigned SubIdx, unsigned State,
1104  const TargetRegisterInfo *TRI) const {
1105  if (!SubIdx)
1106  return MIB.addReg(Reg, State);
1107 
1109  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1110  return MIB.addReg(Reg, State, SubIdx);
1111 }
1112 
1113 void ARMBaseInstrInfo::
1115  Register SrcReg, bool isKill, int FI,
1116  const TargetRegisterClass *RC,
1117  const TargetRegisterInfo *TRI) const {
1118  MachineFunction &MF = *MBB.getParent();
1119  MachineFrameInfo &MFI = MF.getFrameInfo();
1120  Align Alignment = MFI.getObjectAlign(FI);
1121 
1124  MFI.getObjectSize(FI), Alignment);
1125 
1126  switch (TRI->getSpillSize(*RC)) {
1127  case 2:
1128  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1129  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1130  .addReg(SrcReg, getKillRegState(isKill))
1131  .addFrameIndex(FI)
1132  .addImm(0)
1133  .addMemOperand(MMO)
1134  .add(predOps(ARMCC::AL));
1135  } else
1136  llvm_unreachable("Unknown reg class!");
1137  break;
1138  case 4:
1139  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1140  BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1141  .addReg(SrcReg, getKillRegState(isKill))
1142  .addFrameIndex(FI)
1143  .addImm(0)
1144  .addMemOperand(MMO)
1145  .add(predOps(ARMCC::AL));
1146  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1147  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1148  .addReg(SrcReg, getKillRegState(isKill))
1149  .addFrameIndex(FI)
1150  .addImm(0)
1151  .addMemOperand(MMO)
1152  .add(predOps(ARMCC::AL));
1153  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1154  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1155  .addReg(SrcReg, getKillRegState(isKill))
1156  .addFrameIndex(FI)
1157  .addImm(0)
1158  .addMemOperand(MMO)
1159  .add(predOps(ARMCC::AL));
1160  } else
1161  llvm_unreachable("Unknown reg class!");
1162  break;
1163  case 8:
1164  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1165  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1166  .addReg(SrcReg, getKillRegState(isKill))
1167  .addFrameIndex(FI)
1168  .addImm(0)
1169  .addMemOperand(MMO)
1170  .add(predOps(ARMCC::AL));
1171  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1172  if (Subtarget.hasV5TEOps()) {
1174  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1175  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1176  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1177  .add(predOps(ARMCC::AL));
1178  } else {
1179  // Fallback to STM instruction, which has existed since the dawn of
1180  // time.
1181  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1182  .addFrameIndex(FI)
1183  .addMemOperand(MMO)
1184  .add(predOps(ARMCC::AL));
1185  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1186  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1187  }
1188  } else
1189  llvm_unreachable("Unknown reg class!");
1190  break;
1191  case 16:
1192  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1193  // Use aligned spills if the stack can be realigned.
1194  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1195  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1196  .addFrameIndex(FI)
1197  .addImm(16)
1198  .addReg(SrcReg, getKillRegState(isKill))
1199  .addMemOperand(MMO)
1200  .add(predOps(ARMCC::AL));
1201  } else {
1202  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1203  .addReg(SrcReg, getKillRegState(isKill))
1204  .addFrameIndex(FI)
1205  .addMemOperand(MMO)
1206  .add(predOps(ARMCC::AL));
1207  }
1208  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1209  Subtarget.hasMVEIntegerOps()) {
1210  auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1211  MIB.addReg(SrcReg, getKillRegState(isKill))
1212  .addFrameIndex(FI)
1213  .addImm(0)
1214  .addMemOperand(MMO);
1216  } else
1217  llvm_unreachable("Unknown reg class!");
1218  break;
1219  case 24:
1220  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1221  // Use aligned spills if the stack can be realigned.
1222  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1223  Subtarget.hasNEON()) {
1224  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1225  .addFrameIndex(FI)
1226  .addImm(16)
1227  .addReg(SrcReg, getKillRegState(isKill))
1228  .addMemOperand(MMO)
1229  .add(predOps(ARMCC::AL));
1230  } else {
1232  get(ARM::VSTMDIA))
1233  .addFrameIndex(FI)
1234  .add(predOps(ARMCC::AL))
1235  .addMemOperand(MMO);
1236  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1237  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1238  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1239  }
1240  } else
1241  llvm_unreachable("Unknown reg class!");
1242  break;
1243  case 32:
1244  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1245  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1246  Subtarget.hasNEON()) {
1247  // FIXME: It's possible to only store part of the QQ register if the
1248  // spilled def has a sub-register index.
1249  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1250  .addFrameIndex(FI)
1251  .addImm(16)
1252  .addReg(SrcReg, getKillRegState(isKill))
1253  .addMemOperand(MMO)
1254  .add(predOps(ARMCC::AL));
1255  } else {
1257  get(ARM::VSTMDIA))
1258  .addFrameIndex(FI)
1259  .add(predOps(ARMCC::AL))
1260  .addMemOperand(MMO);
1261  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1262  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1263  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1264  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1265  }
1266  } else
1267  llvm_unreachable("Unknown reg class!");
1268  break;
1269  case 64:
1270  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1271  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1272  .addFrameIndex(FI)
1273  .add(predOps(ARMCC::AL))
1274  .addMemOperand(MMO);
1275  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1276  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1277  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1278  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1279  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1280  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1281  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1282  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1283  } else
1284  llvm_unreachable("Unknown reg class!");
1285  break;
1286  default:
1287  llvm_unreachable("Unknown reg class!");
1288  }
1289 }
1290 
1292  int &FrameIndex) const {
1293  switch (MI.getOpcode()) {
1294  default: break;
1295  case ARM::STRrs:
1296  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1297  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1298  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1299  MI.getOperand(3).getImm() == 0) {
1300  FrameIndex = MI.getOperand(1).getIndex();
1301  return MI.getOperand(0).getReg();
1302  }
1303  break;
1304  case ARM::STRi12:
1305  case ARM::t2STRi12:
1306  case ARM::tSTRspi:
1307  case ARM::VSTRD:
1308  case ARM::VSTRS:
1309  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1310  MI.getOperand(2).getImm() == 0) {
1311  FrameIndex = MI.getOperand(1).getIndex();
1312  return MI.getOperand(0).getReg();
1313  }
1314  break;
1315  case ARM::VSTR_P0_off:
1316  if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
1317  MI.getOperand(1).getImm() == 0) {
1318  FrameIndex = MI.getOperand(0).getIndex();
1319  return ARM::P0;
1320  }
1321  break;
1322  case ARM::VST1q64:
1323  case ARM::VST1d64TPseudo:
1324  case ARM::VST1d64QPseudo:
1325  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1326  FrameIndex = MI.getOperand(0).getIndex();
1327  return MI.getOperand(2).getReg();
1328  }
1329  break;
1330  case ARM::VSTMQIA:
1331  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1332  FrameIndex = MI.getOperand(1).getIndex();
1333  return MI.getOperand(0).getReg();
1334  }
1335  break;
1336  }
1337 
1338  return 0;
1339 }
1340 
1342  int &FrameIndex) const {
1344  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1345  Accesses.size() == 1) {
1346  FrameIndex =
1347  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1348  ->getFrameIndex();
1349  return true;
1350  }
1351  return false;
1352 }
1353 
1354 void ARMBaseInstrInfo::
1356  Register DestReg, int FI,
1357  const TargetRegisterClass *RC,
1358  const TargetRegisterInfo *TRI) const {
1359  DebugLoc DL;
1360  if (I != MBB.end()) DL = I->getDebugLoc();
1361  MachineFunction &MF = *MBB.getParent();
1362  MachineFrameInfo &MFI = MF.getFrameInfo();
1363  const Align Alignment = MFI.getObjectAlign(FI);
1366  MFI.getObjectSize(FI), Alignment);
1367 
1368  switch (TRI->getSpillSize(*RC)) {
1369  case 2:
1370  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1371  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1372  .addFrameIndex(FI)
1373  .addImm(0)
1374  .addMemOperand(MMO)
1375  .add(predOps(ARMCC::AL));
1376  } else
1377  llvm_unreachable("Unknown reg class!");
1378  break;
1379  case 4:
1380  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1381  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1382  .addFrameIndex(FI)
1383  .addImm(0)
1384  .addMemOperand(MMO)
1385  .add(predOps(ARMCC::AL));
1386  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1387  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1388  .addFrameIndex(FI)
1389  .addImm(0)
1390  .addMemOperand(MMO)
1391  .add(predOps(ARMCC::AL));
1392  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1393  BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1394  .addFrameIndex(FI)
1395  .addImm(0)
1396  .addMemOperand(MMO)
1397  .add(predOps(ARMCC::AL));
1398  } else
1399  llvm_unreachable("Unknown reg class!");
1400  break;
1401  case 8:
1402  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1403  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1404  .addFrameIndex(FI)
1405  .addImm(0)
1406  .addMemOperand(MMO)
1407  .add(predOps(ARMCC::AL));
1408  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1409  MachineInstrBuilder MIB;
1410 
1411  if (Subtarget.hasV5TEOps()) {
1412  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1413  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1414  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1415  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1416  .add(predOps(ARMCC::AL));
1417  } else {
1418  // Fallback to LDM instruction, which has existed since the dawn of
1419  // time.
1420  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1421  .addFrameIndex(FI)
1422  .addMemOperand(MMO)
1423  .add(predOps(ARMCC::AL));
1424  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1425  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1426  }
1427 
1428  if (Register::isPhysicalRegister(DestReg))
1429  MIB.addReg(DestReg, RegState::ImplicitDefine);
1430  } else
1431  llvm_unreachable("Unknown reg class!");
1432  break;
1433  case 16:
1434  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1435  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1436  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1437  .addFrameIndex(FI)
1438  .addImm(16)
1439  .addMemOperand(MMO)
1440  .add(predOps(ARMCC::AL));
1441  } else {
1442  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1443  .addFrameIndex(FI)
1444  .addMemOperand(MMO)
1445  .add(predOps(ARMCC::AL));
1446  }
1447  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1448  Subtarget.hasMVEIntegerOps()) {
1449  auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1450  MIB.addFrameIndex(FI)
1451  .addImm(0)
1452  .addMemOperand(MMO);
1454  } else
1455  llvm_unreachable("Unknown reg class!");
1456  break;
1457  case 24:
1458  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1459  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1460  Subtarget.hasNEON()) {
1461  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1462  .addFrameIndex(FI)
1463  .addImm(16)
1464  .addMemOperand(MMO)
1465  .add(predOps(ARMCC::AL));
1466  } else {
1467  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1468  .addFrameIndex(FI)
1469  .addMemOperand(MMO)
1470  .add(predOps(ARMCC::AL));
1471  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1472  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1473  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1474  if (Register::isPhysicalRegister(DestReg))
1475  MIB.addReg(DestReg, RegState::ImplicitDefine);
1476  }
1477  } else
1478  llvm_unreachable("Unknown reg class!");
1479  break;
1480  case 32:
1481  if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1482  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1483  Subtarget.hasNEON()) {
1484  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1485  .addFrameIndex(FI)
1486  .addImm(16)
1487  .addMemOperand(MMO)
1488  .add(predOps(ARMCC::AL));
1489  } else {
1490  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1491  .addFrameIndex(FI)
1492  .add(predOps(ARMCC::AL))
1493  .addMemOperand(MMO);
1494  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1495  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1496  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1497  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1498  if (Register::isPhysicalRegister(DestReg))
1499  MIB.addReg(DestReg, RegState::ImplicitDefine);
1500  }
1501  } else
1502  llvm_unreachable("Unknown reg class!");
1503  break;
1504  case 64:
1505  if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1506  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1507  .addFrameIndex(FI)
1508  .add(predOps(ARMCC::AL))
1509  .addMemOperand(MMO);
1510  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1511  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1512  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1513  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1514  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1515  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1516  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1517  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1518  if (Register::isPhysicalRegister(DestReg))
1519  MIB.addReg(DestReg, RegState::ImplicitDefine);
1520  } else
1521  llvm_unreachable("Unknown reg class!");
1522  break;
1523  default:
1524  llvm_unreachable("Unknown regclass!");
1525  }
1526 }
1527 
1529  int &FrameIndex) const {
1530  switch (MI.getOpcode()) {
1531  default: break;
1532  case ARM::LDRrs:
1533  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1534  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1535  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1536  MI.getOperand(3).getImm() == 0) {
1537  FrameIndex = MI.getOperand(1).getIndex();
1538  return MI.getOperand(0).getReg();
1539  }
1540  break;
1541  case ARM::LDRi12:
1542  case ARM::t2LDRi12:
1543  case ARM::tLDRspi:
1544  case ARM::VLDRD:
1545  case ARM::VLDRS:
1546  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1547  MI.getOperand(2).getImm() == 0) {
1548  FrameIndex = MI.getOperand(1).getIndex();
1549  return MI.getOperand(0).getReg();
1550  }
1551  break;
1552  case ARM::VLDR_P0_off:
1553  if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
1554  MI.getOperand(1).getImm() == 0) {
1555  FrameIndex = MI.getOperand(0).getIndex();
1556  return ARM::P0;
1557  }
1558  break;
1559  case ARM::VLD1q64:
1560  case ARM::VLD1d8TPseudo:
1561  case ARM::VLD1d16TPseudo:
1562  case ARM::VLD1d32TPseudo:
1563  case ARM::VLD1d64TPseudo:
1564  case ARM::VLD1d8QPseudo:
1565  case ARM::VLD1d16QPseudo:
1566  case ARM::VLD1d32QPseudo:
1567  case ARM::VLD1d64QPseudo:
1568  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1569  FrameIndex = MI.getOperand(1).getIndex();
1570  return MI.getOperand(0).getReg();
1571  }
1572  break;
1573  case ARM::VLDMQIA:
1574  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1575  FrameIndex = MI.getOperand(1).getIndex();
1576  return MI.getOperand(0).getReg();
1577  }
1578  break;
1579  }
1580 
1581  return 0;
1582 }
1583 
1585  int &FrameIndex) const {
1587  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1588  Accesses.size() == 1) {
1589  FrameIndex =
1590  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1591  ->getFrameIndex();
1592  return true;
1593  }
1594  return false;
1595 }
1596 
1597 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1598 /// depending on whether the result is used.
1599 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1600  bool isThumb1 = Subtarget.isThumb1Only();
1601  bool isThumb2 = Subtarget.isThumb2();
1602  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1603 
1604  DebugLoc dl = MI->getDebugLoc();
1605  MachineBasicBlock *BB = MI->getParent();
1606 
1607  MachineInstrBuilder LDM, STM;
1608  if (isThumb1 || !MI->getOperand(1).isDead()) {
1609  MachineOperand LDWb(MI->getOperand(1));
1610  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1611  : isThumb1 ? ARM::tLDMIA_UPD
1612  : ARM::LDMIA_UPD))
1613  .add(LDWb);
1614  } else {
1615  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1616  }
1617 
1618  if (isThumb1 || !MI->getOperand(0).isDead()) {
1619  MachineOperand STWb(MI->getOperand(0));
1620  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1621  : isThumb1 ? ARM::tSTMIA_UPD
1622  : ARM::STMIA_UPD))
1623  .add(STWb);
1624  } else {
1625  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1626  }
1627 
1628  MachineOperand LDBase(MI->getOperand(3));
1629  LDM.add(LDBase).add(predOps(ARMCC::AL));
1630 
1631  MachineOperand STBase(MI->getOperand(2));
1632  STM.add(STBase).add(predOps(ARMCC::AL));
1633 
1634  // Sort the scratch registers into ascending order.
1636  SmallVector<unsigned, 6> ScratchRegs;
1637  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1638  ScratchRegs.push_back(MI->getOperand(I).getReg());
1639  llvm::sort(ScratchRegs,
1640  [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1641  return TRI.getEncodingValue(Reg1) <
1642  TRI.getEncodingValue(Reg2);
1643  });
1644 
1645  for (const auto &Reg : ScratchRegs) {
1646  LDM.addReg(Reg, RegState::Define);
1647  STM.addReg(Reg, RegState::Kill);
1648  }
1649 
1650  BB->erase(MI);
1651 }
1652 
1654  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1655  assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1656  "LOAD_STACK_GUARD currently supported only for MachO.");
1657  expandLoadStackGuard(MI);
1658  MI.getParent()->erase(MI);
1659  return true;
1660  }
1661 
1662  if (MI.getOpcode() == ARM::MEMCPY) {
1663  expandMEMCPY(MI);
1664  return true;
1665  }
1666 
1667  // This hook gets to expand COPY instructions before they become
1668  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1669  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1670  // changed into a VORR that can go down the NEON pipeline.
1671  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1672  return false;
1673 
1674  // Look for a copy between even S-registers. That is where we keep floats
1675  // when using NEON v2f32 instructions for f32 arithmetic.
1676  Register DstRegS = MI.getOperand(0).getReg();
1677  Register SrcRegS = MI.getOperand(1).getReg();
1678  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1679  return false;
1680 
1682  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1683  &ARM::DPRRegClass);
1684  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1685  &ARM::DPRRegClass);
1686  if (!DstRegD || !SrcRegD)
1687  return false;
1688 
1689  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1690  // legal if the COPY already defines the full DstRegD, and it isn't a
1691  // sub-register insertion.
1692  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1693  return false;
1694 
1695  // A dead copy shouldn't show up here, but reject it just in case.
1696  if (MI.getOperand(0).isDead())
1697  return false;
1698 
1699  // All clear, widen the COPY.
1700  LLVM_DEBUG(dbgs() << "widening: " << MI);
1701  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1702 
1703  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1704  // or some other super-register.
1705  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1706  if (ImpDefIdx != -1)
1707  MI.RemoveOperand(ImpDefIdx);
1708 
1709  // Change the opcode and operands.
1710  MI.setDesc(get(ARM::VMOVD));
1711  MI.getOperand(0).setReg(DstRegD);
1712  MI.getOperand(1).setReg(SrcRegD);
1713  MIB.add(predOps(ARMCC::AL));
1714 
1715  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1716  // register scavenger and machine verifier, so we need to indicate that we
1717  // are reading an undefined value from SrcRegD, but a proper value from
1718  // SrcRegS.
1719  MI.getOperand(1).setIsUndef();
1720  MIB.addReg(SrcRegS, RegState::Implicit);
1721 
1722  // SrcRegD may actually contain an unrelated value in the ssub_1
1723  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1724  if (MI.getOperand(1).isKill()) {
1725  MI.getOperand(1).setIsKill(false);
1726  MI.addRegisterKilled(SrcRegS, TRI, true);
1727  }
1728 
1729  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1730  return true;
1731 }
1732 
1733 /// Create a copy of a const pool value. Update CPI to the new index and return
1734 /// the label UID.
1735 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1738 
1739  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1740  assert(MCPE.isMachineConstantPoolEntry() &&
1741  "Expecting a machine constantpool entry!");
1742  ARMConstantPoolValue *ACPV =
1743  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1744 
1745  unsigned PCLabelId = AFI->createPICLabelUId();
1746  ARMConstantPoolValue *NewCPV = nullptr;
1747 
1748  // FIXME: The below assumes PIC relocation model and that the function
1749  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1750  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1751  // instructions, so that's probably OK, but is PIC always correct when
1752  // we get here?
1753  if (ACPV->isGlobalValue())
1755  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1756  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1757  else if (ACPV->isExtSymbol())
1758  NewCPV = ARMConstantPoolSymbol::
1759  Create(MF.getFunction().getContext(),
1760  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1761  else if (ACPV->isBlockAddress())
1762  NewCPV = ARMConstantPoolConstant::
1763  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1765  else if (ACPV->isLSDA())
1766  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1767  ARMCP::CPLSDA, 4);
1768  else if (ACPV->isMachineBasicBlock())
1769  NewCPV = ARMConstantPoolMBB::
1770  Create(MF.getFunction().getContext(),
1771  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1772  else
1773  llvm_unreachable("Unexpected ARM constantpool value type!!");
1774  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1775  return PCLabelId;
1776 }
1777 
1780  Register DestReg, unsigned SubIdx,
1781  const MachineInstr &Orig,
1782  const TargetRegisterInfo &TRI) const {
1783  unsigned Opcode = Orig.getOpcode();
1784  switch (Opcode) {
1785  default: {
1787  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1788  MBB.insert(I, MI);
1789  break;
1790  }
1791  case ARM::tLDRpci_pic:
1792  case ARM::t2LDRpci_pic: {
1793  MachineFunction &MF = *MBB.getParent();
1794  unsigned CPI = Orig.getOperand(1).getIndex();
1795  unsigned PCLabelId = duplicateCPV(MF, CPI);
1796  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1797  .addConstantPoolIndex(CPI)
1798  .addImm(PCLabelId)
1799  .cloneMemRefs(Orig);
1800  break;
1801  }
1802  }
1803 }
1804 
1805 MachineInstr &
1807  MachineBasicBlock::iterator InsertBefore,
1808  const MachineInstr &Orig) const {
1809  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1811  for (;;) {
1812  switch (I->getOpcode()) {
1813  case ARM::tLDRpci_pic:
1814  case ARM::t2LDRpci_pic: {
1815  MachineFunction &MF = *MBB.getParent();
1816  unsigned CPI = I->getOperand(1).getIndex();
1817  unsigned PCLabelId = duplicateCPV(MF, CPI);
1818  I->getOperand(1).setIndex(CPI);
1819  I->getOperand(2).setImm(PCLabelId);
1820  break;
1821  }
1822  }
1823  if (!I->isBundledWithSucc())
1824  break;
1825  ++I;
1826  }
1827  return Cloned;
1828 }
1829 
1831  const MachineInstr &MI1,
1832  const MachineRegisterInfo *MRI) const {
1833  unsigned Opcode = MI0.getOpcode();
1834  if (Opcode == ARM::t2LDRpci ||
1835  Opcode == ARM::t2LDRpci_pic ||
1836  Opcode == ARM::tLDRpci ||
1837  Opcode == ARM::tLDRpci_pic ||
1838  Opcode == ARM::LDRLIT_ga_pcrel ||
1839  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1840  Opcode == ARM::tLDRLIT_ga_pcrel ||
1841  Opcode == ARM::MOV_ga_pcrel ||
1842  Opcode == ARM::MOV_ga_pcrel_ldr ||
1843  Opcode == ARM::t2MOV_ga_pcrel) {
1844  if (MI1.getOpcode() != Opcode)
1845  return false;
1846  if (MI0.getNumOperands() != MI1.getNumOperands())
1847  return false;
1848 
1849  const MachineOperand &MO0 = MI0.getOperand(1);
1850  const MachineOperand &MO1 = MI1.getOperand(1);
1851  if (MO0.getOffset() != MO1.getOffset())
1852  return false;
1853 
1854  if (Opcode == ARM::LDRLIT_ga_pcrel ||
1855  Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1856  Opcode == ARM::tLDRLIT_ga_pcrel ||
1857  Opcode == ARM::MOV_ga_pcrel ||
1858  Opcode == ARM::MOV_ga_pcrel_ldr ||
1859  Opcode == ARM::t2MOV_ga_pcrel)
1860  // Ignore the PC labels.
1861  return MO0.getGlobal() == MO1.getGlobal();
1862 
1863  const MachineFunction *MF = MI0.getParent()->getParent();
1864  const MachineConstantPool *MCP = MF->getConstantPool();
1865  int CPI0 = MO0.getIndex();
1866  int CPI1 = MO1.getIndex();
1867  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1868  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1869  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1870  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1871  if (isARMCP0 && isARMCP1) {
1872  ARMConstantPoolValue *ACPV0 =
1873  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1874  ARMConstantPoolValue *ACPV1 =
1875  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1876  return ACPV0->hasSameValue(ACPV1);
1877  } else if (!isARMCP0 && !isARMCP1) {
1878  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1879  }
1880  return false;
1881  } else if (Opcode == ARM::PICLDR) {
1882  if (MI1.getOpcode() != Opcode)
1883  return false;
1884  if (MI0.getNumOperands() != MI1.getNumOperands())
1885  return false;
1886 
1887  Register Addr0 = MI0.getOperand(1).getReg();
1888  Register Addr1 = MI1.getOperand(1).getReg();
1889  if (Addr0 != Addr1) {
1890  if (!MRI || !Register::isVirtualRegister(Addr0) ||
1892  return false;
1893 
1894  // This assumes SSA form.
1895  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1896  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1897  // Check if the loaded value, e.g. a constantpool of a global address, are
1898  // the same.
1899  if (!produceSameValue(*Def0, *Def1, MRI))
1900  return false;
1901  }
1902 
1903  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1904  // %12 = PICLDR %11, 0, 14, %noreg
1905  const MachineOperand &MO0 = MI0.getOperand(i);
1906  const MachineOperand &MO1 = MI1.getOperand(i);
1907  if (!MO0.isIdenticalTo(MO1))
1908  return false;
1909  }
1910  return true;
1911  }
1912 
1914 }
1915 
1916 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1917 /// determine if two loads are loading from the same base address. It should
1918 /// only return true if the base pointers are the same and the only differences
1919 /// between the two addresses is the offset. It also returns the offsets by
1920 /// reference.
1921 ///
1922 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1923 /// is permanently disabled.
1925  int64_t &Offset1,
1926  int64_t &Offset2) const {
1927  // Don't worry about Thumb: just ARM and Thumb2.
1928  if (Subtarget.isThumb1Only()) return false;
1929 
1930  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1931  return false;
1932 
1933  switch (Load1->getMachineOpcode()) {
1934  default:
1935  return false;
1936  case ARM::LDRi12:
1937  case ARM::LDRBi12:
1938  case ARM::LDRD:
1939  case ARM::LDRH:
1940  case ARM::LDRSB:
1941  case ARM::LDRSH:
1942  case ARM::VLDRD:
1943  case ARM::VLDRS:
1944  case ARM::t2LDRi8:
1945  case ARM::t2LDRBi8:
1946  case ARM::t2LDRDi8:
1947  case ARM::t2LDRSHi8:
1948  case ARM::t2LDRi12:
1949  case ARM::t2LDRBi12:
1950  case ARM::t2LDRSHi12:
1951  break;
1952  }
1953 
1954  switch (Load2->getMachineOpcode()) {
1955  default:
1956  return false;
1957  case ARM::LDRi12:
1958  case ARM::LDRBi12:
1959  case ARM::LDRD:
1960  case ARM::LDRH:
1961  case ARM::LDRSB:
1962  case ARM::LDRSH:
1963  case ARM::VLDRD:
1964  case ARM::VLDRS:
1965  case ARM::t2LDRi8:
1966  case ARM::t2LDRBi8:
1967  case ARM::t2LDRSHi8:
1968  case ARM::t2LDRi12:
1969  case ARM::t2LDRBi12:
1970  case ARM::t2LDRSHi12:
1971  break;
1972  }
1973 
1974  // Check if base addresses and chain operands match.
1975  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1976  Load1->getOperand(4) != Load2->getOperand(4))
1977  return false;
1978 
1979  // Index should be Reg0.
1980  if (Load1->getOperand(3) != Load2->getOperand(3))
1981  return false;
1982 
1983  // Determine the offsets.
1984  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1985  isa<ConstantSDNode>(Load2->getOperand(1))) {
1986  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1987  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1988  return true;
1989  }
1990 
1991  return false;
1992 }
1993 
1994 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1995 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1996 /// be scheduled togther. On some targets if two loads are loading from
1997 /// addresses in the same cache line, it's better if they are scheduled
1998 /// together. This function takes two integers that represent the load offsets
1999 /// from the common base address. It returns true if it decides it's desirable
2000 /// to schedule the two loads together. "NumLoads" is the number of loads that
2001 /// have already been scheduled after Load1.
2002 ///
2003 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2004 /// is permanently disabled.
2006  int64_t Offset1, int64_t Offset2,
2007  unsigned NumLoads) const {
2008  // Don't worry about Thumb: just ARM and Thumb2.
2009  if (Subtarget.isThumb1Only()) return false;
2010 
2011  assert(Offset2 > Offset1);
2012 
2013  if ((Offset2 - Offset1) / 8 > 64)
2014  return false;
2015 
2016  // Check if the machine opcodes are different. If they are different
2017  // then we consider them to not be of the same base address,
2018  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2019  // In this case, they are considered to be the same because they are different
2020  // encoding forms of the same basic instruction.
2021  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2022  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2023  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2024  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2025  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2026  return false; // FIXME: overly conservative?
2027 
2028  // Four loads in a row should be sufficient.
2029  if (NumLoads >= 3)
2030  return false;
2031 
2032  return true;
2033 }
2034 
2036  const MachineBasicBlock *MBB,
2037  const MachineFunction &MF) const {
2038  // Debug info is never a scheduling boundary. It's necessary to be explicit
2039  // due to the special treatment of IT instructions below, otherwise a
2040  // dbg_value followed by an IT will result in the IT instruction being
2041  // considered a scheduling hazard, which is wrong. It should be the actual
2042  // instruction preceding the dbg_value instruction(s), just like it is
2043  // when debug info is not present.
2044  if (MI.isDebugInstr())
2045  return false;
2046 
2047  // Terminators and labels can't be scheduled around.
2048  if (MI.isTerminator() || MI.isPosition())
2049  return true;
2050 
2051  // INLINEASM_BR can jump to another block
2052  if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2053  return true;
2054 
2055  // Treat the start of the IT block as a scheduling boundary, but schedule
2056  // t2IT along with all instructions following it.
2057  // FIXME: This is a big hammer. But the alternative is to add all potential
2058  // true and anti dependencies to IT block instructions as implicit operands
2059  // to the t2IT instruction. The added compile time and complexity does not
2060  // seem worth it.
2062  // Make sure to skip any debug instructions
2063  while (++I != MBB->end() && I->isDebugInstr())
2064  ;
2065  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2066  return true;
2067 
2068  // Don't attempt to schedule around any instruction that defines
2069  // a stack-oriented pointer, as it's unlikely to be profitable. This
2070  // saves compile time, because it doesn't require every single
2071  // stack slot reference to depend on the instruction that does the
2072  // modification.
2073  // Calls don't actually change the stack pointer, even if they have imp-defs.
2074  // No ARM calling conventions change the stack pointer. (X86 calling
2075  // conventions sometimes do).
2076  if (!MI.isCall() && MI.definesRegister(ARM::SP))
2077  return true;
2078 
2079  return false;
2080 }
2081 
2082 bool ARMBaseInstrInfo::
2084  unsigned NumCycles, unsigned ExtraPredCycles,
2085  BranchProbability Probability) const {
2086  if (!NumCycles)
2087  return false;
2088 
2089  // If we are optimizing for size, see if the branch in the predecessor can be
2090  // lowered to cbn?z by the constant island lowering pass, and return false if
2091  // so. This results in a shorter instruction sequence.
2092  if (MBB.getParent()->getFunction().hasOptSize()) {
2093  MachineBasicBlock *Pred = *MBB.pred_begin();
2094  if (!Pred->empty()) {
2095  MachineInstr *LastMI = &*Pred->rbegin();
2096  if (LastMI->getOpcode() == ARM::t2Bcc) {
2098  MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2099  if (CmpMI)
2100  return false;
2101  }
2102  }
2103  }
2104  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2105  MBB, 0, 0, Probability);
2106 }
2107 
2108 bool ARMBaseInstrInfo::
2110  unsigned TCycles, unsigned TExtra,
2111  MachineBasicBlock &FBB,
2112  unsigned FCycles, unsigned FExtra,
2113  BranchProbability Probability) const {
2114  if (!TCycles)
2115  return false;
2116 
2117  // In thumb code we often end up trading one branch for a IT block, and
2118  // if we are cloning the instruction can increase code size. Prevent
2119  // blocks with multiple predecesors from being ifcvted to prevent this
2120  // cloning.
2121  if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2122  if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2123  return false;
2124  }
2125 
2126  // Attempt to estimate the relative costs of predication versus branching.
2127  // Here we scale up each component of UnpredCost to avoid precision issue when
2128  // scaling TCycles/FCycles by Probability.
2129  const unsigned ScalingUpFactor = 1024;
2130 
2131  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2132  unsigned UnpredCost;
2133  if (!Subtarget.hasBranchPredictor()) {
2134  // When we don't have a branch predictor it's always cheaper to not take a
2135  // branch than take it, so we have to take that into account.
2136  unsigned NotTakenBranchCost = 1;
2137  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2138  unsigned TUnpredCycles, FUnpredCycles;
2139  if (!FCycles) {
2140  // Triangle: TBB is the fallthrough
2141  TUnpredCycles = TCycles + NotTakenBranchCost;
2142  FUnpredCycles = TakenBranchCost;
2143  } else {
2144  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2145  TUnpredCycles = TCycles + TakenBranchCost;
2146  FUnpredCycles = FCycles + NotTakenBranchCost;
2147  // The branch at the end of FBB will disappear when it's predicated, so
2148  // discount it from PredCost.
2149  PredCost -= 1 * ScalingUpFactor;
2150  }
2151  // The total cost is the cost of each path scaled by their probabilites
2152  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2153  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2154  UnpredCost = TUnpredCost + FUnpredCost;
2155  // When predicating assume that the first IT can be folded away but later
2156  // ones cost one cycle each
2157  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2158  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2159  }
2160  } else {
2161  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2162  unsigned FUnpredCost =
2163  Probability.getCompl().scale(FCycles * ScalingUpFactor);
2164  UnpredCost = TUnpredCost + FUnpredCost;
2165  UnpredCost += 1 * ScalingUpFactor; // The branch itself
2166  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2167  }
2168 
2169  return PredCost <= UnpredCost;
2170 }
2171 
2172 unsigned
2174  unsigned NumInsts) const {
2175  // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2176  // ARM has a condition code field in every predicable instruction, using it
2177  // doesn't change code size.
2178  if (!Subtarget.isThumb2())
2179  return 0;
2180 
2181  // It's possible that the size of the IT is restricted to a single block.
2182  unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2183  return divideCeil(NumInsts, MaxInsts) * 2;
2184 }
2185 
2186 unsigned
2188  // If this branch is likely to be folded into the comparison to form a
2189  // CB(N)Z, then removing it won't reduce code size at all, because that will
2190  // just replace the CB(N)Z with a CMP.
2191  if (MI.getOpcode() == ARM::t2Bcc &&
2193  return 0;
2194 
2195  unsigned Size = getInstSizeInBytes(MI);
2196 
2197  // For Thumb2, all branches are 32-bit instructions during the if conversion
2198  // pass, but may be replaced with 16-bit instructions during size reduction.
2199  // Since the branches considered by if conversion tend to be forward branches
2200  // over small basic blocks, they are very likely to be in range for the
2201  // narrow instructions, so we assume the final code size will be half what it
2202  // currently is.
2203  if (Subtarget.isThumb2())
2204  Size /= 2;
2205 
2206  return Size;
2207 }
2208 
2209 bool
2211  MachineBasicBlock &FMBB) const {
2212  // Reduce false anti-dependencies to let the target's out-of-order execution
2213  // engine do its thing.
2214  return Subtarget.isProfitableToUnpredicate();
2215 }
2216 
2217 /// getInstrPredicate - If instruction is predicated, returns its predicate
2218 /// condition, otherwise returns AL. It also returns the condition code
2219 /// register by reference.
2221  Register &PredReg) {
2222  int PIdx = MI.findFirstPredOperandIdx();
2223  if (PIdx == -1) {
2224  PredReg = 0;
2225  return ARMCC::AL;
2226  }
2227 
2228  PredReg = MI.getOperand(PIdx+1).getReg();
2229  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2230 }
2231 
2232 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2233  if (Opc == ARM::B)
2234  return ARM::Bcc;
2235  if (Opc == ARM::tB)
2236  return ARM::tBcc;
2237  if (Opc == ARM::t2B)
2238  return ARM::t2Bcc;
2239 
2240  llvm_unreachable("Unknown unconditional branch opcode!");
2241 }
2242 
2244  bool NewMI,
2245  unsigned OpIdx1,
2246  unsigned OpIdx2) const {
2247  switch (MI.getOpcode()) {
2248  case ARM::MOVCCr:
2249  case ARM::t2MOVCCr: {
2250  // MOVCC can be commuted by inverting the condition.
2251  Register PredReg;
2252  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2253  // MOVCC AL can't be inverted. Shouldn't happen.
2254  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2255  return nullptr;
2256  MachineInstr *CommutedMI =
2257  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2258  if (!CommutedMI)
2259  return nullptr;
2260  // After swapping the MOVCC operands, also invert the condition.
2261  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2263  return CommutedMI;
2264  }
2265  }
2266  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2267 }
2268 
2269 /// Identify instructions that can be folded into a MOVCC instruction, and
2270 /// return the defining instruction.
2271 MachineInstr *
2272 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2273  const TargetInstrInfo *TII) const {
2274  if (!Reg.isVirtual())
2275  return nullptr;
2276  if (!MRI.hasOneNonDBGUse(Reg))
2277  return nullptr;
2279  if (!MI)
2280  return nullptr;
2281  // Check if MI can be predicated and folded into the MOVCC.
2282  if (!isPredicable(*MI))
2283  return nullptr;
2284  // Check if MI has any non-dead defs or physreg uses. This also detects
2285  // predicated instructions which will be reading CPSR.
2286  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2287  const MachineOperand &MO = MI->getOperand(i);
2288  // Reject frame index operands, PEI can't handle the predicated pseudos.
2289  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2290  return nullptr;
2291  if (!MO.isReg())
2292  continue;
2293  // MI can't have any tied operands, that would conflict with predication.
2294  if (MO.isTied())
2295  return nullptr;
2297  return nullptr;
2298  if (MO.isDef() && !MO.isDead())
2299  return nullptr;
2300  }
2301  bool DontMoveAcrossStores = true;
2302  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2303  return nullptr;
2304  return MI;
2305 }
2306 
2309  unsigned &TrueOp, unsigned &FalseOp,
2310  bool &Optimizable) const {
2311  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2312  "Unknown select instruction");
2313  // MOVCC operands:
2314  // 0: Def.
2315  // 1: True use.
2316  // 2: False use.
2317  // 3: Condition code.
2318  // 4: CPSR use.
2319  TrueOp = 1;
2320  FalseOp = 2;
2321  Cond.push_back(MI.getOperand(3));
2322  Cond.push_back(MI.getOperand(4));
2323  // We can always fold a def.
2324  Optimizable = true;
2325  return false;
2326 }
2327 
2328 MachineInstr *
2331  bool PreferFalse) const {
2332  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2333  "Unknown select instruction");
2334  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2335  MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2336  bool Invert = !DefMI;
2337  if (!DefMI)
2338  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2339  if (!DefMI)
2340  return nullptr;
2341 
2342  // Find new register class to use.
2343  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2344  Register DestReg = MI.getOperand(0).getReg();
2345  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2346  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2347  return nullptr;
2348 
2349  // Create a new predicated version of DefMI.
2350  // Rfalse is the first use.
2351  MachineInstrBuilder NewMI =
2352  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2353 
2354  // Copy all the DefMI operands, excluding its (null) predicate.
2355  const MCInstrDesc &DefDesc = DefMI->getDesc();
2356  for (unsigned i = 1, e = DefDesc.getNumOperands();
2357  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2358  NewMI.add(DefMI->getOperand(i));
2359 
2360  unsigned CondCode = MI.getOperand(3).getImm();
2361  if (Invert)
2363  else
2364  NewMI.addImm(CondCode);
2365  NewMI.add(MI.getOperand(4));
2366 
2367  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2368  if (NewMI->hasOptionalDef())
2369  NewMI.add(condCodeOp());
2370 
2371  // The output register value when the predicate is false is an implicit
2372  // register operand tied to the first def.
2373  // The tie makes the register allocator ensure the FalseReg is allocated the
2374  // same register as operand 0.
2375  FalseReg.setImplicit();
2376  NewMI.add(FalseReg);
2377  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2378 
2379  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2380  SeenMIs.insert(NewMI);
2381  SeenMIs.erase(DefMI);
2382 
2383  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2384  // DefMI would be invalid when tranferred inside the loop. Checking for a
2385  // loop is expensive, but at least remove kill flags if they are in different
2386  // BBs.
2387  if (DefMI->getParent() != MI.getParent())
2388  NewMI->clearKillInfo();
2389 
2390  // The caller will erase MI, but not DefMI.
2392  return NewMI;
2393 }
2394 
2395 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2396 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2397 /// def operand.
2398 ///
2399 /// This will go away once we can teach tblgen how to set the optional CPSR def
2400 /// operand itself.
2404 };
2405 
2407  {ARM::ADDSri, ARM::ADDri},
2408  {ARM::ADDSrr, ARM::ADDrr},
2409  {ARM::ADDSrsi, ARM::ADDrsi},
2410  {ARM::ADDSrsr, ARM::ADDrsr},
2411 
2412  {ARM::SUBSri, ARM::SUBri},
2413  {ARM::SUBSrr, ARM::SUBrr},
2414  {ARM::SUBSrsi, ARM::SUBrsi},
2415  {ARM::SUBSrsr, ARM::SUBrsr},
2416 
2417  {ARM::RSBSri, ARM::RSBri},
2418  {ARM::RSBSrsi, ARM::RSBrsi},
2419  {ARM::RSBSrsr, ARM::RSBrsr},
2420 
2421  {ARM::tADDSi3, ARM::tADDi3},
2422  {ARM::tADDSi8, ARM::tADDi8},
2423  {ARM::tADDSrr, ARM::tADDrr},
2424  {ARM::tADCS, ARM::tADC},
2425 
2426  {ARM::tSUBSi3, ARM::tSUBi3},
2427  {ARM::tSUBSi8, ARM::tSUBi8},
2428  {ARM::tSUBSrr, ARM::tSUBrr},
2429  {ARM::tSBCS, ARM::tSBC},
2430  {ARM::tRSBS, ARM::tRSB},
2431  {ARM::tLSLSri, ARM::tLSLri},
2432 
2433  {ARM::t2ADDSri, ARM::t2ADDri},
2434  {ARM::t2ADDSrr, ARM::t2ADDrr},
2435  {ARM::t2ADDSrs, ARM::t2ADDrs},
2436 
2437  {ARM::t2SUBSri, ARM::t2SUBri},
2438  {ARM::t2SUBSrr, ARM::t2SUBrr},
2439  {ARM::t2SUBSrs, ARM::t2SUBrs},
2440 
2441  {ARM::t2RSBSri, ARM::t2RSBri},
2442  {ARM::t2RSBSrs, ARM::t2RSBrs},
2443 };
2444 
2445 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2446  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2447  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2449  return 0;
2450 }
2451 
2454  const DebugLoc &dl, Register DestReg,
2455  Register BaseReg, int NumBytes,
2456  ARMCC::CondCodes Pred, Register PredReg,
2457  const ARMBaseInstrInfo &TII,
2458  unsigned MIFlags) {
2459  if (NumBytes == 0 && DestReg != BaseReg) {
2460  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2461  .addReg(BaseReg, RegState::Kill)
2462  .add(predOps(Pred, PredReg))
2463  .add(condCodeOp())
2464  .setMIFlags(MIFlags);
2465  return;
2466  }
2467 
2468  bool isSub = NumBytes < 0;
2469  if (isSub) NumBytes = -NumBytes;
2470 
2471  while (NumBytes) {
2472  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2473  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2474  assert(ThisVal && "Didn't extract field correctly");
2475 
2476  // We will handle these bits from offset, clear them.
2477  NumBytes &= ~ThisVal;
2478 
2479  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2480 
2481  // Build the new ADD / SUB.
2482  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2483  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2484  .addReg(BaseReg, RegState::Kill)
2485  .addImm(ThisVal)
2486  .add(predOps(Pred, PredReg))
2487  .add(condCodeOp())
2488  .setMIFlags(MIFlags);
2489  BaseReg = DestReg;
2490  }
2491 }
2492 
2495  unsigned NumBytes) {
2496  // This optimisation potentially adds lots of load and store
2497  // micro-operations, it's only really a great benefit to code-size.
2498  if (!Subtarget.hasMinSize())
2499  return false;
2500 
2501  // If only one register is pushed/popped, LLVM can use an LDR/STR
2502  // instead. We can't modify those so make sure we're dealing with an
2503  // instruction we understand.
2504  bool IsPop = isPopOpcode(MI->getOpcode());
2505  bool IsPush = isPushOpcode(MI->getOpcode());
2506  if (!IsPush && !IsPop)
2507  return false;
2508 
2509  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2510  MI->getOpcode() == ARM::VLDMDIA_UPD;
2511  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2512  MI->getOpcode() == ARM::tPOP ||
2513  MI->getOpcode() == ARM::tPOP_RET;
2514 
2515  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2516  MI->getOperand(1).getReg() == ARM::SP)) &&
2517  "trying to fold sp update into non-sp-updating push/pop");
2518 
2519  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2520  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2521  // if this is violated.
2522  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2523  return false;
2524 
2525  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2526  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2527  int RegListIdx = IsT1PushPop ? 2 : 4;
2528 
2529  // Calculate the space we'll need in terms of registers.
2530  unsigned RegsNeeded;
2531  const TargetRegisterClass *RegClass;
2532  if (IsVFPPushPop) {
2533  RegsNeeded = NumBytes / 8;
2534  RegClass = &ARM::DPRRegClass;
2535  } else {
2536  RegsNeeded = NumBytes / 4;
2537  RegClass = &ARM::GPRRegClass;
2538  }
2539 
2540  // We're going to have to strip all list operands off before
2541  // re-adding them since the order matters, so save the existing ones
2542  // for later.
2544 
2545  // We're also going to need the first register transferred by this
2546  // instruction, which won't necessarily be the first register in the list.
2547  unsigned FirstRegEnc = -1;
2548 
2550  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2551  MachineOperand &MO = MI->getOperand(i);
2552  RegList.push_back(MO);
2553 
2554  if (MO.isReg() && !MO.isImplicit() &&
2555  TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2556  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2557  }
2558 
2559  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2560 
2561  // Now try to find enough space in the reglist to allocate NumBytes.
2562  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2563  --CurRegEnc) {
2564  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2565  if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2566  continue;
2567  if (!IsPop) {
2568  // Pushing any register is completely harmless, mark the register involved
2569  // as undef since we don't care about its value and must not restore it
2570  // during stack unwinding.
2571  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2572  false, false, true));
2573  --RegsNeeded;
2574  continue;
2575  }
2576 
2577  // However, we can only pop an extra register if it's not live. For
2578  // registers live within the function we might clobber a return value
2579  // register; the other way a register can be live here is if it's
2580  // callee-saved.
2581  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2582  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2584  // VFP pops don't allow holes in the register list, so any skip is fatal
2585  // for our transformation. GPR pops do, so we should just keep looking.
2586  if (IsVFPPushPop)
2587  return false;
2588  else
2589  continue;
2590  }
2591 
2592  // Mark the unimportant registers as <def,dead> in the POP.
2593  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2594  true));
2595  --RegsNeeded;
2596  }
2597 
2598  if (RegsNeeded > 0)
2599  return false;
2600 
2601  // Finally we know we can profitably perform the optimisation so go
2602  // ahead: strip all existing registers off and add them back again
2603  // in the right order.
2604  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2605  MI->RemoveOperand(i);
2606 
2607  // Add the complete list back in.
2608  MachineInstrBuilder MIB(MF, &*MI);
2609  for (int i = RegList.size() - 1; i >= 0; --i)
2610  MIB.add(RegList[i]);
2611 
2612  return true;
2613 }
2614 
2615 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2616  Register FrameReg, int &Offset,
2617  const ARMBaseInstrInfo &TII) {
2618  unsigned Opcode = MI.getOpcode();
2619  const MCInstrDesc &Desc = MI.getDesc();
2620  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2621  bool isSub = false;
2622 
2623  // Memory operands in inline assembly always use AddrMode2.
2624  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2626 
2627  if (Opcode == ARM::ADDri) {
2628  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2629  if (Offset == 0) {
2630  // Turn it into a move.
2631  MI.setDesc(TII.get(ARM::MOVr));
2632  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2633  MI.RemoveOperand(FrameRegIdx+1);
2634  Offset = 0;
2635  return true;
2636  } else if (Offset < 0) {
2637  Offset = -Offset;
2638  isSub = true;
2639  MI.setDesc(TII.get(ARM::SUBri));
2640  }
2641 
2642  // Common case: small offset, fits into instruction.
2643  if (ARM_AM::getSOImmVal(Offset) != -1) {
2644  // Replace the FrameIndex with sp / fp
2645  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2646  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2647  Offset = 0;
2648  return true;
2649  }
2650 
2651  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2652  // as possible.
2653  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2654  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2655 
2656  // We will handle these bits from offset, clear them.
2657  Offset &= ~ThisImmVal;
2658 
2659  // Get the properly encoded SOImmVal field.
2660  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2661  "Bit extraction didn't work?");
2662  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2663  } else {
2664  unsigned ImmIdx = 0;
2665  int InstrOffs = 0;
2666  unsigned NumBits = 0;
2667  unsigned Scale = 1;
2668  switch (AddrMode) {
2669  case ARMII::AddrMode_i12:
2670  ImmIdx = FrameRegIdx + 1;
2671  InstrOffs = MI.getOperand(ImmIdx).getImm();
2672  NumBits = 12;
2673  break;
2674  case ARMII::AddrMode2:
2675  ImmIdx = FrameRegIdx+2;
2676  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2677  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2678  InstrOffs *= -1;
2679  NumBits = 12;
2680  break;
2681  case ARMII::AddrMode3:
2682  ImmIdx = FrameRegIdx+2;
2683  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2684  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2685  InstrOffs *= -1;
2686  NumBits = 8;
2687  break;
2688  case ARMII::AddrMode4:
2689  case ARMII::AddrMode6:
2690  // Can't fold any offset even if it's zero.
2691  return false;
2692  case ARMII::AddrMode5:
2693  ImmIdx = FrameRegIdx+1;
2694  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2695  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2696  InstrOffs *= -1;
2697  NumBits = 8;
2698  Scale = 4;
2699  break;
2700  case ARMII::AddrMode5FP16:
2701  ImmIdx = FrameRegIdx+1;
2702  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2703  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2704  InstrOffs *= -1;
2705  NumBits = 8;
2706  Scale = 2;
2707  break;
2708  case ARMII::AddrModeT2_i7:
2711  ImmIdx = FrameRegIdx+1;
2712  InstrOffs = MI.getOperand(ImmIdx).getImm();
2713  NumBits = 7;
2714  Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2715  AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2716  break;
2717  default:
2718  llvm_unreachable("Unsupported addressing mode!");
2719  }
2720 
2721  Offset += InstrOffs * Scale;
2722  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2723  if (Offset < 0) {
2724  Offset = -Offset;
2725  isSub = true;
2726  }
2727 
2728  // Attempt to fold address comp. if opcode has offset bits
2729  if (NumBits > 0) {
2730  // Common case: small offset, fits into instruction.
2731  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2732  int ImmedOffset = Offset / Scale;
2733  unsigned Mask = (1 << NumBits) - 1;
2734  if ((unsigned)Offset <= Mask * Scale) {
2735  // Replace the FrameIndex with sp
2736  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2737  // FIXME: When addrmode2 goes away, this will simplify (like the
2738  // T2 version), as the LDR.i12 versions don't need the encoding
2739  // tricks for the offset value.
2740  if (isSub) {
2742  ImmedOffset = -ImmedOffset;
2743  else
2744  ImmedOffset |= 1 << NumBits;
2745  }
2746  ImmOp.ChangeToImmediate(ImmedOffset);
2747  Offset = 0;
2748  return true;
2749  }
2750 
2751  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2752  ImmedOffset = ImmedOffset & Mask;
2753  if (isSub) {
2755  ImmedOffset = -ImmedOffset;
2756  else
2757  ImmedOffset |= 1 << NumBits;
2758  }
2759  ImmOp.ChangeToImmediate(ImmedOffset);
2760  Offset &= ~(Mask*Scale);
2761  }
2762  }
2763 
2764  Offset = (isSub) ? -Offset : Offset;
2765  return Offset == 0;
2766 }
2767 
2768 /// analyzeCompare - For a comparison instruction, return the source registers
2769 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2770 /// compares against in CmpValue. Return true if the comparison instruction
2771 /// can be analyzed.
2773  Register &SrcReg2, int &CmpMask,
2774  int &CmpValue) const {
2775  switch (MI.getOpcode()) {
2776  default: break;
2777  case ARM::CMPri:
2778  case ARM::t2CMPri:
2779  case ARM::tCMPi8:
2780  SrcReg = MI.getOperand(0).getReg();
2781  SrcReg2 = 0;
2782  CmpMask = ~0;
2783  CmpValue = MI.getOperand(1).getImm();
2784  return true;
2785  case ARM::CMPrr:
2786  case ARM::t2CMPrr:
2787  case ARM::tCMPr:
2788  SrcReg = MI.getOperand(0).getReg();
2789  SrcReg2 = MI.getOperand(1).getReg();
2790  CmpMask = ~0;
2791  CmpValue = 0;
2792  return true;
2793  case ARM::TSTri:
2794  case ARM::t2TSTri:
2795  SrcReg = MI.getOperand(0).getReg();
2796  SrcReg2 = 0;
2797  CmpMask = MI.getOperand(1).getImm();
2798  CmpValue = 0;
2799  return true;
2800  }
2801 
2802  return false;
2803 }
2804 
2805 /// isSuitableForMask - Identify a suitable 'and' instruction that
2806 /// operates on the given source register and applies the same mask
2807 /// as a 'tst' instruction. Provide a limited look-through for copies.
2808 /// When successful, MI will hold the found instruction.
2810  int CmpMask, bool CommonUse) {
2811  switch (MI->getOpcode()) {
2812  case ARM::ANDri:
2813  case ARM::t2ANDri:
2814  if (CmpMask != MI->getOperand(2).getImm())
2815  return false;
2816  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2817  return true;
2818  break;
2819  }
2820 
2821  return false;
2822 }
2823 
2824 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2825 /// the condition code if we modify the instructions such that flags are
2826 /// set by ADD(a,b,X).
2828  switch (CC) {
2829  default: return ARMCC::AL;
2830  case ARMCC::HS: return ARMCC::LO;
2831  case ARMCC::LO: return ARMCC::HS;
2832  case ARMCC::VS: return ARMCC::VS;
2833  case ARMCC::VC: return ARMCC::VC;
2834  }
2835 }
2836 
2837 /// isRedundantFlagInstr - check whether the first instruction, whose only
2838 /// purpose is to update flags, can be made redundant.
2839 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2840 /// CMPri can be made redundant by SUBri if the operands are the same.
2841 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2842 /// This function can be extended later on.
2843 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2844  Register SrcReg, Register SrcReg2,
2845  int ImmValue, const MachineInstr *OI,
2846  bool &IsThumb1) {
2847  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2848  (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2849  ((OI->getOperand(1).getReg() == SrcReg &&
2850  OI->getOperand(2).getReg() == SrcReg2) ||
2851  (OI->getOperand(1).getReg() == SrcReg2 &&
2852  OI->getOperand(2).getReg() == SrcReg))) {
2853  IsThumb1 = false;
2854  return true;
2855  }
2856 
2857  if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2858  ((OI->getOperand(2).getReg() == SrcReg &&
2859  OI->getOperand(3).getReg() == SrcReg2) ||
2860  (OI->getOperand(2).getReg() == SrcReg2 &&
2861  OI->getOperand(3).getReg() == SrcReg))) {
2862  IsThumb1 = true;
2863  return true;
2864  }
2865 
2866  if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2867  (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2868  OI->getOperand(1).getReg() == SrcReg &&
2869  OI->getOperand(2).getImm() == ImmValue) {
2870  IsThumb1 = false;
2871  return true;
2872  }
2873 
2874  if (CmpI->getOpcode() == ARM::tCMPi8 &&
2875  (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2876  OI->getOperand(2).getReg() == SrcReg &&
2877  OI->getOperand(3).getImm() == ImmValue) {
2878  IsThumb1 = true;
2879  return true;
2880  }
2881 
2882  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2883  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2884  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2885  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2886  OI->getOperand(0).getReg() == SrcReg &&
2887  OI->getOperand(1).getReg() == SrcReg2) {
2888  IsThumb1 = false;
2889  return true;
2890  }
2891 
2892  if (CmpI->getOpcode() == ARM::tCMPr &&
2893  (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2894  OI->getOpcode() == ARM::tADDrr) &&
2895  OI->getOperand(0).getReg() == SrcReg &&
2896  OI->getOperand(2).getReg() == SrcReg2) {
2897  IsThumb1 = true;
2898  return true;
2899  }
2900 
2901  return false;
2902 }
2903 
2904 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2905  switch (MI->getOpcode()) {
2906  default: return false;
2907  case ARM::tLSLri:
2908  case ARM::tLSRri:
2909  case ARM::tLSLrr:
2910  case ARM::tLSRrr:
2911  case ARM::tSUBrr:
2912  case ARM::tADDrr:
2913  case ARM::tADDi3:
2914  case ARM::tADDi8:
2915  case ARM::tSUBi3:
2916  case ARM::tSUBi8:
2917  case ARM::tMUL:
2918  case ARM::tADC:
2919  case ARM::tSBC:
2920  case ARM::tRSB:
2921  case ARM::tAND:
2922  case ARM::tORR:
2923  case ARM::tEOR:
2924  case ARM::tBIC:
2925  case ARM::tMVN:
2926  case ARM::tASRri:
2927  case ARM::tASRrr:
2928  case ARM::tROR:
2929  IsThumb1 = true;
2931  case ARM::RSBrr:
2932  case ARM::RSBri:
2933  case ARM::RSCrr:
2934  case ARM::RSCri:
2935  case ARM::ADDrr:
2936  case ARM::ADDri:
2937  case ARM::ADCrr:
2938  case ARM::ADCri:
2939  case ARM::SUBrr:
2940  case ARM::SUBri:
2941  case ARM::SBCrr:
2942  case ARM::SBCri:
2943  case ARM::t2RSBri:
2944  case ARM::t2ADDrr:
2945  case ARM::t2ADDri:
2946  case ARM::t2ADCrr:
2947  case ARM::t2ADCri:
2948  case ARM::t2SUBrr:
2949  case ARM::t2SUBri:
2950  case ARM::t2SBCrr:
2951  case ARM::t2SBCri:
2952  case ARM::ANDrr:
2953  case ARM::ANDri:
2954  case ARM::t2ANDrr:
2955  case ARM::t2ANDri:
2956  case ARM::ORRrr:
2957  case ARM::ORRri:
2958  case ARM::t2ORRrr:
2959  case ARM::t2ORRri:
2960  case ARM::EORrr:
2961  case ARM::EORri:
2962  case ARM::t2EORrr:
2963  case ARM::t2EORri:
2964  case ARM::t2LSRri:
2965  case ARM::t2LSRrr:
2966  case ARM::t2LSLri:
2967  case ARM::t2LSLrr:
2968  return true;
2969  }
2970 }
2971 
2972 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2973 /// comparison into one that sets the zero bit in the flags register;
2974 /// Remove a redundant Compare instruction if an earlier instruction can set the
2975 /// flags in the same way as Compare.
2976 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2977 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2978 /// condition code of instructions which use the flags.
2980  MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
2981  int CmpValue, const MachineRegisterInfo *MRI) const {
2982  // Get the unique definition of SrcReg.
2983  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2984  if (!MI) return false;
2985 
2986  // Masked compares sometimes use the same register as the corresponding 'and'.
2987  if (CmpMask != ~0) {
2988  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2989  MI = nullptr;
2991  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2992  UI != UE; ++UI) {
2993  if (UI->getParent() != CmpInstr.getParent())
2994  continue;
2995  MachineInstr *PotentialAND = &*UI;
2996  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2997  isPredicated(*PotentialAND))
2998  continue;
2999  MI = PotentialAND;
3000  break;
3001  }
3002  if (!MI) return false;
3003  }
3004  }
3005 
3006  // Get ready to iterate backward from CmpInstr.
3007  MachineBasicBlock::iterator I = CmpInstr, E = MI,
3008  B = CmpInstr.getParent()->begin();
3009 
3010  // Early exit if CmpInstr is at the beginning of the BB.
3011  if (I == B) return false;
3012 
3013  // There are two possible candidates which can be changed to set CPSR:
3014  // One is MI, the other is a SUB or ADD instruction.
3015  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3016  // ADDr[ri](r1, r2, X).
3017  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3018  MachineInstr *SubAdd = nullptr;
3019  if (SrcReg2 != 0)
3020  // MI is not a candidate for CMPrr.
3021  MI = nullptr;
3022  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3023  // Conservatively refuse to convert an instruction which isn't in the same
3024  // BB as the comparison.
3025  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3026  // Thus we cannot return here.
3027  if (CmpInstr.getOpcode() == ARM::CMPri ||
3028  CmpInstr.getOpcode() == ARM::t2CMPri ||
3029  CmpInstr.getOpcode() == ARM::tCMPi8)
3030  MI = nullptr;
3031  else
3032  return false;
3033  }
3034 
3035  bool IsThumb1 = false;
3036  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3037  return false;
3038 
3039  // We also want to do this peephole for cases like this: if (a*b == 0),
3040  // and optimise away the CMP instruction from the generated code sequence:
3041  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3042  // resulting from the select instruction, but these MOVS instructions for
3043  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3044  // However, if we only have MOVS instructions in between the CMP and the
3045  // other instruction (the MULS in this example), then the CPSR is dead so we
3046  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3047  // reordering and then continue the analysis hoping we can eliminate the
3048  // CMP. This peephole works on the vregs, so is still in SSA form. As a
3049  // consequence, the movs won't redefine/kill the MUL operands which would
3050  // make this reordering illegal.
3052  if (MI && IsThumb1) {
3053  --I;
3054  if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3055  bool CanReorder = true;
3056  for (; I != E; --I) {
3057  if (I->getOpcode() != ARM::tMOVi8) {
3058  CanReorder = false;
3059  break;
3060  }
3061  }
3062  if (CanReorder) {
3063  MI = MI->removeFromParent();
3064  E = CmpInstr;
3065  CmpInstr.getParent()->insert(E, MI);
3066  }
3067  }
3068  I = CmpInstr;
3069  E = MI;
3070  }
3071 
3072  // Check that CPSR isn't set between the comparison instruction and the one we
3073  // want to change. At the same time, search for SubAdd.
3074  bool SubAddIsThumb1 = false;
3075  do {
3076  const MachineInstr &Instr = *--I;
3077 
3078  // Check whether CmpInstr can be made redundant by the current instruction.
3079  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3080  SubAddIsThumb1)) {
3081  SubAdd = &*I;
3082  break;
3083  }
3084 
3085  // Allow E (which was initially MI) to be SubAdd but do not search before E.
3086  if (I == E)
3087  break;
3088 
3089  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3090  Instr.readsRegister(ARM::CPSR, TRI))
3091  // This instruction modifies or uses CPSR after the one we want to
3092  // change. We can't do this transformation.
3093  return false;
3094 
3095  if (I == B) {
3096  // In some cases, we scan the use-list of an instruction for an AND;
3097  // that AND is in the same BB, but may not be scheduled before the
3098  // corresponding TST. In that case, bail out.
3099  //
3100  // FIXME: We could try to reschedule the AND.
3101  return false;
3102  }
3103  } while (true);
3104 
3105  // Return false if no candidates exist.
3106  if (!MI && !SubAdd)
3107  return false;
3108 
3109  // If we found a SubAdd, use it as it will be closer to the CMP
3110  if (SubAdd) {
3111  MI = SubAdd;
3112  IsThumb1 = SubAddIsThumb1;
3113  }
3114 
3115  // We can't use a predicated instruction - it doesn't always write the flags.
3116  if (isPredicated(*MI))
3117  return false;
3118 
3119  // Scan forward for the use of CPSR
3120  // When checking against MI: if it's a conditional code that requires
3121  // checking of the V bit or C bit, then this is not safe to do.
3122  // It is safe to remove CmpInstr if CPSR is redefined or killed.
3123  // If we are done with the basic block, we need to check whether CPSR is
3124  // live-out.
3126  OperandsToUpdate;
3127  bool isSafe = false;
3128  I = CmpInstr;
3129  E = CmpInstr.getParent()->end();
3130  while (!isSafe && ++I != E) {
3131  const MachineInstr &Instr = *I;
3132  for (unsigned IO = 0, EO = Instr.getNumOperands();
3133  !isSafe && IO != EO; ++IO) {
3134  const MachineOperand &MO = Instr.getOperand(IO);
3135  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3136  isSafe = true;
3137  break;
3138  }
3139  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3140  continue;
3141  if (MO.isDef()) {
3142  isSafe = true;
3143  break;
3144  }
3145  // Condition code is after the operand before CPSR except for VSELs.
3146  ARMCC::CondCodes CC;
3147  bool IsInstrVSel = true;
3148  switch (Instr.getOpcode()) {
3149  default:
3150  IsInstrVSel = false;
3151  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3152  break;
3153  case ARM::VSELEQD:
3154  case ARM::VSELEQS:
3155  case ARM::VSELEQH:
3156  CC = ARMCC::EQ;
3157  break;
3158  case ARM::VSELGTD:
3159  case ARM::VSELGTS:
3160  case ARM::VSELGTH:
3161  CC = ARMCC::GT;
3162  break;
3163  case ARM::VSELGED:
3164  case ARM::VSELGES:
3165  case ARM::VSELGEH:
3166  CC = ARMCC::GE;
3167  break;
3168  case ARM::VSELVSD:
3169  case ARM::VSELVSS:
3170  case ARM::VSELVSH:
3171  CC = ARMCC::VS;
3172  break;
3173  }
3174 
3175  if (SubAdd) {
3176  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3177  // on CMP needs to be updated to be based on SUB.
3178  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3179  // needs to be modified.
3180  // Push the condition code operands to OperandsToUpdate.
3181  // If it is safe to remove CmpInstr, the condition code of these
3182  // operands will be modified.
3183  unsigned Opc = SubAdd->getOpcode();
3184  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3185  Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3186  Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3187  Opc == ARM::tSUBi8;
3188  unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3189  if (!IsSub ||
3190  (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3191  SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3192  // VSel doesn't support condition code update.
3193  if (IsInstrVSel)
3194  return false;
3195  // Ensure we can swap the condition.
3196  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3197  if (NewCC == ARMCC::AL)
3198  return false;
3199  OperandsToUpdate.push_back(
3200  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3201  }
3202  } else {
3203  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3204  switch (CC) {
3205  case ARMCC::EQ: // Z
3206  case ARMCC::NE: // Z
3207  case ARMCC::MI: // N
3208  case ARMCC::PL: // N
3209  case ARMCC::AL: // none
3210  // CPSR can be used multiple times, we should continue.
3211  break;
3212  case ARMCC::HS: // C
3213  case ARMCC::LO: // C
3214  case ARMCC::VS: // V
3215  case ARMCC::VC: // V
3216  case ARMCC::HI: // C Z
3217  case ARMCC::LS: // C Z
3218  case ARMCC::GE: // N V
3219  case ARMCC::LT: // N V
3220  case ARMCC::GT: // Z N V
3221  case ARMCC::LE: // Z N V
3222  // The instruction uses the V bit or C bit which is not safe.
3223  return false;
3224  }
3225  }
3226  }
3227  }
3228 
3229  // If CPSR is not killed nor re-defined, we should check whether it is
3230  // live-out. If it is live-out, do not optimize.
3231  if (!isSafe) {
3232  MachineBasicBlock *MBB = CmpInstr.getParent();
3234  SE = MBB->succ_end(); SI != SE; ++SI)
3235  if ((*SI)->isLiveIn(ARM::CPSR))
3236  return false;
3237  }
3238 
3239  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3240  // set CPSR so this is represented as an explicit output)
3241  if (!IsThumb1) {
3242  MI->getOperand(5).setReg(ARM::CPSR);
3243  MI->getOperand(5).setIsDef(true);
3244  }
3245  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3246  CmpInstr.eraseFromParent();
3247 
3248  // Modify the condition code of operands in OperandsToUpdate.
3249  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3250  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3251  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3252  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3253 
3254  MI->clearRegisterDeads(ARM::CPSR);
3255 
3256  return true;
3257 }
3258 
3260  // Do not sink MI if it might be used to optimize a redundant compare.
3261  // We heuristically only look at the instruction immediately following MI to
3262  // avoid potentially searching the entire basic block.
3263  if (isPredicated(MI))
3264  return true;
3266  ++Next;
3267  Register SrcReg, SrcReg2;
3268  int CmpMask, CmpValue;
3269  bool IsThumb1;
3270  if (Next != MI.getParent()->end() &&
3271  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3272  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3273  return false;
3274  return true;
3275 }
3276 
3278  Register Reg,
3279  MachineRegisterInfo *MRI) const {
3280  // Fold large immediates into add, sub, or, xor.
3281  unsigned DefOpc = DefMI.getOpcode();
3282  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
3283  return false;
3284  if (!DefMI.getOperand(1).isImm())
3285  // Could be t2MOVi32imm @xx
3286  return false;
3287 
3288  if (!MRI->hasOneNonDBGUse(Reg))
3289  return false;
3290 
3291  const MCInstrDesc &DefMCID = DefMI.getDesc();
3292  if (DefMCID.hasOptionalDef()) {
3293  unsigned NumOps = DefMCID.getNumOperands();
3294  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3295  if (MO.getReg() == ARM::CPSR && !MO.isDead())
3296  // If DefMI defines CPSR and it is not dead, it's obviously not safe
3297  // to delete DefMI.
3298  return false;
3299  }
3300 
3301  const MCInstrDesc &UseMCID = UseMI.getDesc();
3302  if (UseMCID.hasOptionalDef()) {
3303  unsigned NumOps = UseMCID.getNumOperands();
3304  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3305  // If the instruction sets the flag, do not attempt this optimization
3306  // since it may change the semantics of the code.
3307  return false;
3308  }
3309 
3310  unsigned UseOpc = UseMI.getOpcode();
3311  unsigned NewUseOpc = 0;
3312  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3313  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3314  bool Commute = false;
3315  switch (UseOpc) {
3316  default: return false;
3317  case ARM::SUBrr:
3318  case ARM::ADDrr:
3319  case ARM::ORRrr:
3320  case ARM::EORrr:
3321  case ARM::t2SUBrr:
3322  case ARM::t2ADDrr:
3323  case ARM::t2ORRrr:
3324  case ARM::t2EORrr: {
3325  Commute = UseMI.getOperand(2).getReg() != Reg;
3326  switch (UseOpc) {
3327  default: break;
3328  case ARM::ADDrr:
3329  case ARM::SUBrr:
3330  if (UseOpc == ARM::SUBrr && Commute)
3331  return false;
3332 
3333  // ADD/SUB are special because they're essentially the same operation, so
3334  // we can handle a larger range of immediates.
3335  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3336  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3337  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3338  ImmVal = -ImmVal;
3339  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3340  } else
3341  return false;
3342  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3343  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3344  break;
3345  case ARM::ORRrr:
3346  case ARM::EORrr:
3347  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3348  return false;
3349  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3350  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3351  switch (UseOpc) {
3352  default: break;
3353  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3354  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3355  }
3356  break;
3357  case ARM::t2ADDrr:
3358  case ARM::t2SUBrr: {
3359  if (UseOpc == ARM::t2SUBrr && Commute)
3360  return false;
3361 
3362  // ADD/SUB are special because they're essentially the same operation, so
3363  // we can handle a larger range of immediates.
3364  const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3365  const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3366  const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3367  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3368  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3369  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3370  ImmVal = -ImmVal;
3371  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3372  } else
3373  return false;
3374  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3375  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3376  break;
3377  }
3378  case ARM::t2ORRrr:
3379  case ARM::t2EORrr:
3380  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3381  return false;
3382  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3383  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3384  switch (UseOpc) {
3385  default: break;
3386  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3387  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3388  }
3389  break;
3390  }
3391  }
3392  }
3393 
3394  unsigned OpIdx = Commute ? 2 : 1;
3395  Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3396  bool isKill = UseMI.getOperand(OpIdx).isKill();
3397  const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3398  Register NewReg = MRI->createVirtualRegister(TRC);
3399  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3400  NewReg)
3401  .addReg(Reg1, getKillRegState(isKill))
3402  .addImm(SOImmValV1)
3403  .add(predOps(ARMCC::AL))
3404  .add(condCodeOp());
3405  UseMI.setDesc(get(NewUseOpc));
3406  UseMI.getOperand(1).setReg(NewReg);
3407  UseMI.getOperand(1).setIsKill();
3408  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3409  DefMI.eraseFromParent();
3410  // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3411  // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3412  // Then the below code will not be needed, as the input/output register
3413  // classes will be rgpr or gprSP.
3414  // For now, we fix the UseMI operand explicitly here:
3415  switch(NewUseOpc){
3416  case ARM::t2ADDspImm:
3417  case ARM::t2SUBspImm:
3418  case ARM::t2ADDri:
3419  case ARM::t2SUBri:
3420  MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3421  }
3422  return true;
3423 }
3424 
3425 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3426  const MachineInstr &MI) {
3427  switch (MI.getOpcode()) {
3428  default: {
3429  const MCInstrDesc &Desc = MI.getDesc();
3430  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3431  assert(UOps >= 0 && "bad # UOps");
3432  return UOps;
3433  }
3434 
3435  case ARM::LDRrs:
3436  case ARM::LDRBrs:
3437  case ARM::STRrs:
3438  case ARM::STRBrs: {
3439  unsigned ShOpVal = MI.getOperand(3).getImm();
3440  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3441  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3442  if (!isSub &&
3443  (ShImm == 0 ||
3444  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3445  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3446  return 1;
3447  return 2;
3448  }
3449 
3450  case ARM::LDRH:
3451  case ARM::STRH: {
3452  if (!MI.getOperand(2).getReg())
3453  return 1;
3454 
3455  unsigned ShOpVal = MI.getOperand(3).getImm();
3456  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3457  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3458  if (!isSub &&
3459  (ShImm == 0 ||
3460  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3461  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3462  return 1;
3463  return 2;
3464  }
3465 
3466  case ARM::LDRSB:
3467  case ARM::LDRSH:
3468  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3469 
3470  case ARM::LDRSB_POST:
3471  case ARM::LDRSH_POST: {
3472  Register Rt = MI.getOperand(0).getReg();
3473  Register Rm = MI.getOperand(3).getReg();
3474  return (Rt == Rm) ? 4 : 3;
3475  }
3476 
3477  case ARM::LDR_PRE_REG:
3478  case ARM::LDRB_PRE_REG: {
3479  Register Rt = MI.getOperand(0).getReg();
3480  Register Rm = MI.getOperand(3).getReg();
3481  if (Rt == Rm)
3482  return 3;
3483  unsigned ShOpVal = MI.getOperand(4).getImm();
3484  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3485  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3486  if (!isSub &&
3487  (ShImm == 0 ||
3488  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3489  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3490  return 2;
3491  return 3;
3492  }
3493 
3494  case ARM::STR_PRE_REG:
3495  case ARM::STRB_PRE_REG: {
3496  unsigned ShOpVal = MI.getOperand(4).getImm();
3497  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3498  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3499  if (!isSub &&
3500  (ShImm == 0 ||
3501  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3502  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3503  return 2;
3504  return 3;
3505  }
3506 
3507  case ARM::LDRH_PRE:
3508  case ARM::STRH_PRE: {
3509  Register Rt = MI.getOperand(0).getReg();
3510  Register Rm = MI.getOperand(3).getReg();
3511  if (!Rm)
3512  return 2;
3513  if (Rt == Rm)
3514  return 3;
3515  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3516  }
3517 
3518  case ARM::LDR_POST_REG:
3519  case ARM::LDRB_POST_REG:
3520  case ARM::LDRH_POST: {
3521  Register Rt = MI.getOperand(0).getReg();
3522  Register Rm = MI.getOperand(3).getReg();
3523  return (Rt == Rm) ? 3 : 2;
3524  }
3525 
3526  case ARM::LDR_PRE_IMM:
3527  case ARM::LDRB_PRE_IMM:
3528  case ARM::LDR_POST_IMM:
3529  case ARM::LDRB_POST_IMM:
3530  case ARM::STRB_POST_IMM:
3531  case ARM::STRB_POST_REG:
3532  case ARM::STRB_PRE_IMM:
3533  case ARM::STRH_POST:
3534  case ARM::STR_POST_IMM:
3535  case ARM::STR_POST_REG:
3536  case ARM::STR_PRE_IMM:
3537  return 2;
3538 
3539  case ARM::LDRSB_PRE:
3540  case ARM::LDRSH_PRE: {
3541  Register Rm = MI.getOperand(3).getReg();
3542  if (Rm == 0)
3543  return 3;
3544  Register Rt = MI.getOperand(0).getReg();
3545  if (Rt == Rm)
3546  return 4;
3547  unsigned ShOpVal = MI.getOperand(4).getImm();
3548  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3549  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3550  if (!isSub &&
3551  (ShImm == 0 ||
3552  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3553  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3554  return 3;
3555  return 4;
3556  }
3557 
3558  case ARM::LDRD: {
3559  Register Rt = MI.getOperand(0).getReg();
3560  Register Rn = MI.getOperand(2).getReg();
3561  Register Rm = MI.getOperand(3).getReg();
3562  if (Rm)
3563  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3564  : 3;
3565  return (Rt == Rn) ? 3 : 2;
3566  }
3567 
3568  case ARM::STRD: {
3569  Register Rm = MI.getOperand(3).getReg();
3570  if (Rm)
3571  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3572  : 3;
3573  return 2;
3574  }
3575 
3576  case ARM::LDRD_POST:
3577  case ARM::t2LDRD_POST:
3578  return 3;
3579 
3580  case ARM::STRD_POST:
3581  case ARM::t2STRD_POST:
3582  return 4;
3583 
3584  case ARM::LDRD_PRE: {
3585  Register Rt = MI.getOperand(0).getReg();
3586  Register Rn = MI.getOperand(3).getReg();
3587  Register Rm = MI.getOperand(4).getReg();
3588  if (Rm)
3589  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3590  : 4;
3591  return (Rt == Rn) ? 4 : 3;
3592  }
3593 
3594  case ARM::t2LDRD_PRE: {
3595  Register Rt = MI.getOperand(0).getReg();
3596  Register Rn = MI.getOperand(3).getReg();
3597  return (Rt == Rn) ? 4 : 3;
3598  }
3599 
3600  case ARM::STRD_PRE: {
3601  Register Rm = MI.getOperand(4).getReg();
3602  if (Rm)
3603  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3604  : 4;
3605  return 3;
3606  }
3607 
3608  case ARM::t2STRD_PRE:
3609  return 3;
3610 
3611  case ARM::t2LDR_POST:
3612  case ARM::t2LDRB_POST:
3613  case ARM::t2LDRB_PRE:
3614  case ARM::t2LDRSBi12:
3615  case ARM::t2LDRSBi8:
3616  case ARM::t2LDRSBpci:
3617  case ARM::t2LDRSBs:
3618  case ARM::t2LDRH_POST:
3619  case ARM::t2LDRH_PRE:
3620  case ARM::t2LDRSBT:
3621  case ARM::t2LDRSB_POST:
3622  case ARM::t2LDRSB_PRE:
3623  case ARM::t2LDRSH_POST:
3624  case ARM::t2LDRSH_PRE:
3625  case ARM::t2LDRSHi12:
3626  case ARM::t2LDRSHi8:
3627  case ARM::t2LDRSHpci:
3628  case ARM::t2LDRSHs:
3629  return 2;
3630 
3631  case ARM::t2LDRDi8: {
3632  Register Rt = MI.getOperand(0).getReg();
3633  Register Rn = MI.getOperand(2).getReg();
3634  return (Rt == Rn) ? 3 : 2;
3635  }
3636 
3637  case ARM::t2STRB_POST:
3638  case ARM::t2STRB_PRE:
3639  case ARM::t2STRBs:
3640  case ARM::t2STRDi8:
3641  case ARM::t2STRH_POST:
3642  case ARM::t2STRH_PRE:
3643  case ARM::t2STRHs:
3644  case ARM::t2STR_POST:
3645  case ARM::t2STR_PRE:
3646  case ARM::t2STRs:
3647  return 2;
3648  }
3649 }
3650 
3651 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3652 // can't be easily determined return 0 (missing MachineMemOperand).
3653 //
3654 // FIXME: The current MachineInstr design does not support relying on machine
3655 // mem operands to determine the width of a memory access. Instead, we expect
3656 // the target to provide this information based on the instruction opcode and
3657 // operands. However, using MachineMemOperand is the best solution now for
3658 // two reasons:
3659 //
3660 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3661 // operands. This is much more dangerous than using the MachineMemOperand
3662 // sizes because CodeGen passes can insert/remove optional machine operands. In
3663 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3664 // postRA passes as well.
3665 //
3666 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3667 // machine model that calls this should handle the unknown (zero size) case.
3668 //
3669 // Long term, we should require a target hook that verifies MachineMemOperand
3670 // sizes during MC lowering. That target hook should be local to MC lowering
3671 // because we can't ensure that it is aware of other MI forms. Doing this will
3672 // ensure that MachineMemOperands are correctly propagated through all passes.
3674  unsigned Size = 0;
3675  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3676  E = MI.memoperands_end();
3677  I != E; ++I) {
3678  Size += (*I)->getSize();
3679  }
3680  // FIXME: The scheduler currently can't handle values larger than 16. But
3681  // the values can actually go up to 32 for floating-point load/store
3682  // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3683  // operations isn't right; we could end up with "extra" memory operands for
3684  // various reasons, like tail merge merging two memory operations.
3685  return std::min(Size / 4, 16U);
3686 }
3687 
3688 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3689  unsigned NumRegs) {
3690  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3691  switch (Opc) {
3692  default:
3693  break;
3694  case ARM::VLDMDIA_UPD:
3695  case ARM::VLDMDDB_UPD:
3696  case ARM::VLDMSIA_UPD:
3697  case ARM::VLDMSDB_UPD:
3698  case ARM::VSTMDIA_UPD:
3699  case ARM::VSTMDDB_UPD:
3700  case ARM::VSTMSIA_UPD:
3701  case ARM::VSTMSDB_UPD:
3702  case ARM::LDMIA_UPD:
3703  case ARM::LDMDA_UPD:
3704  case ARM::LDMDB_UPD:
3705  case ARM::LDMIB_UPD:
3706  case ARM::STMIA_UPD:
3707  case ARM::STMDA_UPD:
3708  case ARM::STMDB_UPD:
3709  case ARM::STMIB_UPD:
3710  case ARM::tLDMIA_UPD:
3711  case ARM::tSTMIA_UPD:
3712  case ARM::t2LDMIA_UPD:
3713  case ARM::t2LDMDB_UPD:
3714  case ARM::t2STMIA_UPD:
3715  case ARM::t2STMDB_UPD:
3716  ++UOps; // One for base register writeback.
3717  break;
3718  case ARM::LDMIA_RET:
3719  case ARM::tPOP_RET:
3720  case ARM::t2LDMIA_RET:
3721  UOps += 2; // One for base reg wb, one for write to pc.
3722  break;
3723  }
3724  return UOps;
3725 }
3726 
3728  const MachineInstr &MI) const {
3729  if (!ItinData || ItinData->isEmpty())
3730  return 1;
3731 
3732  const MCInstrDesc &Desc = MI.getDesc();
3733  unsigned Class = Desc.getSchedClass();
3734  int ItinUOps = ItinData->getNumMicroOps(Class);
3735  if (ItinUOps >= 0) {
3736  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3737  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3738 
3739  return ItinUOps;
3740  }
3741 
3742  unsigned Opc = MI.getOpcode();
3743  switch (Opc) {
3744  default:
3745  llvm_unreachable("Unexpected multi-uops instruction!");
3746  case ARM::VLDMQIA:
3747  case ARM::VSTMQIA:
3748  return 2;
3749 
3750  // The number of uOps for load / store multiple are determined by the number
3751  // registers.
3752  //
3753  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3754  // same cycle. The scheduling for the first load / store must be done
3755  // separately by assuming the address is not 64-bit aligned.
3756  //
3757  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3758  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3759  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3760  case ARM::VLDMDIA:
3761  case ARM::VLDMDIA_UPD:
3762  case ARM::VLDMDDB_UPD:
3763  case ARM::VLDMSIA:
3764  case ARM::VLDMSIA_UPD:
3765  case ARM::VLDMSDB_UPD:
3766  case ARM::VSTMDIA:
3767  case ARM::VSTMDIA_UPD:
3768  case ARM::VSTMDDB_UPD:
3769  case ARM::VSTMSIA:
3770  case ARM::VSTMSIA_UPD:
3771  case ARM::VSTMSDB_UPD: {
3772  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3773  return (NumRegs / 2) + (NumRegs % 2) + 1;
3774  }
3775 
3776  case ARM::LDMIA_RET:
3777  case ARM::LDMIA:
3778  case ARM::LDMDA:
3779  case ARM::LDMDB:
3780  case ARM::LDMIB:
3781  case ARM::LDMIA_UPD:
3782  case ARM::LDMDA_UPD:
3783  case ARM::LDMDB_UPD:
3784  case ARM::LDMIB_UPD:
3785  case ARM::STMIA:
3786  case ARM::STMDA:
3787  case ARM::STMDB:
3788  case ARM::STMIB:
3789  case ARM::STMIA_UPD:
3790  case ARM::STMDA_UPD:
3791  case ARM::STMDB_UPD:
3792  case ARM::STMIB_UPD:
3793  case ARM::tLDMIA:
3794  case ARM::tLDMIA_UPD:
3795  case ARM::tSTMIA_UPD:
3796  case ARM::tPOP_RET:
3797  case ARM::tPOP:
3798  case ARM::tPUSH:
3799  case ARM::t2LDMIA_RET:
3800  case ARM::t2LDMIA:
3801  case ARM::t2LDMDB:
3802  case ARM::t2LDMIA_UPD:
3803  case ARM::t2LDMDB_UPD:
3804  case ARM::t2STMIA:
3805  case ARM::t2STMDB:
3806  case ARM::t2STMIA_UPD:
3807  case ARM::t2STMDB_UPD: {
3808  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3809  switch (Subtarget.getLdStMultipleTiming()) {
3811  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3813  // Assume the worst.
3814  return NumRegs;
3816  if (NumRegs < 4)
3817  return 2;
3818  // 4 registers would be issued: 2, 2.
3819  // 5 registers would be issued: 2, 2, 1.
3820  unsigned UOps = (NumRegs / 2);
3821  if (NumRegs % 2)
3822  ++UOps;
3823  return UOps;
3824  }
3826  unsigned UOps = (NumRegs / 2);
3827  // If there are odd number of registers or if it's not 64-bit aligned,
3828  // then it takes an extra AGU (Address Generation Unit) cycle.
3829  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3830  (*MI.memoperands_begin())->getAlign() < Align(8))
3831  ++UOps;
3832  return UOps;
3833  }
3834  }
3835  }
3836  }
3837  llvm_unreachable("Didn't find the number of microops");
3838 }
3839 
3840 int
3841 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3842  const MCInstrDesc &DefMCID,
3843  unsigned DefClass,
3844  unsigned DefIdx, unsigned DefAlign) const {
3845  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3846  if (RegNo <= 0)
3847  // Def is the address writeback.
3848  return ItinData->getOperandCycle(DefClass, DefIdx);
3849 
3850  int DefCycle;
3851  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3852  // (regno / 2) + (regno % 2) + 1
3853  DefCycle = RegNo / 2 + 1;
3854  if (RegNo % 2)
3855  ++DefCycle;
3856  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3857  DefCycle = RegNo;
3858  bool isSLoad = false;
3859 
3860  switch (DefMCID.getOpcode()) {
3861  default: break;
3862  case ARM::VLDMSIA:
3863  case ARM::VLDMSIA_UPD:
3864  case ARM::VLDMSDB_UPD:
3865  isSLoad = true;
3866  break;
3867  }
3868 
3869  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3870  // then it takes an extra cycle.
3871  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3872  ++DefCycle;
3873  } else {
3874  // Assume the worst.
3875  DefCycle = RegNo + 2;
3876  }
3877 
3878  return DefCycle;
3879 }
3880 
3881 int
3882 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3883  const MCInstrDesc &DefMCID,
3884  unsigned DefClass,
3885  unsigned DefIdx, unsigned DefAlign) const {
3886  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3887  if (RegNo <= 0)
3888  // Def is the address writeback.
3889  return ItinData->getOperandCycle(DefClass, DefIdx);
3890 
3891  int DefCycle;
3892  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3893  // 4 registers would be issued: 1, 2, 1.
3894  // 5 registers would be issued: 1, 2, 2.
3895  DefCycle = RegNo / 2;
3896  if (DefCycle < 1)
3897  DefCycle = 1;
3898  // Result latency is issue cycle + 2: E2.
3899  DefCycle += 2;
3900  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3901  DefCycle = (RegNo / 2);
3902  // If there are odd number of registers or if it's not 64-bit aligned,
3903  // then it takes an extra AGU (Address Generation Unit) cycle.
3904  if ((RegNo % 2) || DefAlign < 8)
3905  ++DefCycle;
3906  // Result latency is AGU cycles + 2.
3907  DefCycle += 2;
3908  } else {
3909  // Assume the worst.
3910  DefCycle = RegNo + 2;
3911  }
3912 
3913  return DefCycle;
3914 }
3915 
3916 int
3917 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3918  const MCInstrDesc &UseMCID,
3919  unsigned UseClass,
3920  unsigned UseIdx, unsigned UseAlign) const {
3921  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3922  if (RegNo <= 0)
3923  return ItinData->getOperandCycle(UseClass, UseIdx);
3924 
3925  int UseCycle;
3926  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3927  // (regno / 2) + (regno % 2) + 1
3928  UseCycle = RegNo / 2 + 1;
3929  if (RegNo % 2)
3930  ++UseCycle;
3931  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3932  UseCycle = RegNo;
3933  bool isSStore = false;
3934 
3935  switch (UseMCID.getOpcode()) {
3936  default: break;
3937  case ARM::VSTMSIA:
3938  case ARM::VSTMSIA_UPD:
3939  case ARM::VSTMSDB_UPD:
3940  isSStore = true;
3941  break;
3942  }
3943 
3944  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3945  // then it takes an extra cycle.
3946  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3947  ++UseCycle;
3948  } else {
3949  // Assume the worst.
3950  UseCycle = RegNo + 2;
3951  }
3952 
3953  return UseCycle;
3954 }
3955 
3956 int
3957 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3958  const MCInstrDesc &UseMCID,
3959  unsigned UseClass,
3960  unsigned UseIdx, unsigned UseAlign) const {
3961  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3962  if (RegNo <= 0)
3963  return ItinData->getOperandCycle(UseClass, UseIdx);
3964 
3965  int UseCycle;
3966  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3967  UseCycle = RegNo / 2;
3968  if (UseCycle < 2)
3969  UseCycle = 2;
3970  // Read in E3.
3971  UseCycle += 2;
3972  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3973  UseCycle = (RegNo / 2);
3974  // If there are odd number of registers or if it's not 64-bit aligned,
3975  // then it takes an extra AGU (Address Generation Unit) cycle.
3976  if ((RegNo % 2) || UseAlign < 8)
3977  ++UseCycle;
3978  } else {
3979  // Assume the worst.
3980  UseCycle = 1;
3981  }
3982  return UseCycle;
3983 }
3984 
3985 int
3987  const MCInstrDesc &DefMCID,
3988  unsigned DefIdx, unsigned DefAlign,
3989  const MCInstrDesc &UseMCID,
3990  unsigned UseIdx, unsigned UseAlign) const {
3991  unsigned DefClass = DefMCID.getSchedClass();
3992  unsigned UseClass = UseMCID.getSchedClass();
3993 
3994  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3995  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3996 
3997  // This may be a def / use of a variable_ops instruction, the operand
3998  // latency might be determinable dynamically. Let the target try to
3999  // figure it out.
4000  int DefCycle = -1;
4001  bool LdmBypass = false;
4002  switch (DefMCID.getOpcode()) {
4003  default:
4004  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4005  break;
4006 
4007  case ARM::VLDMDIA:
4008  case ARM::VLDMDIA_UPD:
4009  case ARM::VLDMDDB_UPD:
4010  case ARM::VLDMSIA:
4011  case ARM::VLDMSIA_UPD:
4012  case ARM::VLDMSDB_UPD:
4013  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4014  break;
4015 
4016  case ARM::LDMIA_RET:
4017  case ARM::LDMIA:
4018  case ARM::LDMDA:
4019  case ARM::LDMDB:
4020  case ARM::LDMIB:
4021  case ARM::LDMIA_UPD:
4022  case ARM::LDMDA_UPD:
4023  case ARM::LDMDB_UPD:
4024  case ARM::LDMIB_UPD:
4025  case ARM::tLDMIA:
4026  case ARM::tLDMIA_UPD:
4027  case ARM::tPUSH:
4028  case ARM::t2LDMIA_RET:
4029  case ARM::t2LDMIA:
4030  case ARM::t2LDMDB:
4031  case ARM::t2LDMIA_UPD:
4032  case ARM::t2LDMDB_UPD:
4033  LdmBypass = true;
4034  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4035  break;
4036  }
4037 
4038  if (DefCycle == -1)
4039  // We can't seem to determine the result latency of the def, assume it's 2.
4040  DefCycle = 2;
4041 
4042  int UseCycle = -1;
4043  switch (UseMCID.getOpcode()) {
4044  default:
4045  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4046  break;
4047 
4048  case ARM::VSTMDIA:
4049  case ARM::VSTMDIA_UPD:
4050  case ARM::VSTMDDB_UPD:
4051  case ARM::VSTMSIA:
4052  case ARM::VSTMSIA_UPD:
4053  case ARM::VSTMSDB_UPD:
4054  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4055  break;
4056 
4057  case ARM::STMIA:
4058  case ARM::STMDA:
4059  case ARM::STMDB:
4060  case ARM::STMIB:
4061  case ARM::STMIA_UPD:
4062  case ARM::STMDA_UPD:
4063  case ARM::STMDB_UPD:
4064  case ARM::STMIB_UPD:
4065  case ARM::tSTMIA_UPD:
4066  case ARM::tPOP_RET:
4067  case ARM::tPOP:
4068  case ARM::t2STMIA:
4069  case ARM::t2STMDB:
4070  case ARM::t2STMIA_UPD:
4071  case ARM::t2STMDB_UPD:
4072  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4073  break;
4074  }
4075 
4076  if (UseCycle == -1)
4077  // Assume it's read in the first stage.
4078  UseCycle = 1;
4079 
4080  UseCycle = DefCycle - UseCycle + 1;
4081  if (UseCycle > 0) {
4082  if (LdmBypass) {
4083  // It's a variable_ops instruction so we can't use DefIdx here. Just use
4084  // first def operand.
4085  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4086  UseClass, UseIdx))
4087  --UseCycle;
4088  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4089  UseClass, UseIdx)) {
4090  --UseCycle;
4091  }
4092  }
4093 
4094  return UseCycle;
4095 }
4096 
4098  const MachineInstr *MI, unsigned Reg,
4099  unsigned &DefIdx, unsigned &Dist) {
4100  Dist = 0;
4101 
4103  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4104  assert(II->isInsideBundle() && "Empty bundle?");
4105 
4106  int Idx = -1;
4107  while (II->isInsideBundle()) {
4108  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
4109  if (Idx != -1)
4110  break;
4111  --II;
4112  ++Dist;
4113  }
4114 
4115  assert(Idx != -1 && "Cannot find bundled definition!");
4116  DefIdx = Idx;
4117  return &*II;
4118 }
4119 
4121  const MachineInstr &MI, unsigned Reg,
4122  unsigned &UseIdx, unsigned &Dist) {
4123  Dist = 0;
4124 
4125  MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4126  assert(II->isInsideBundle() && "Empty bundle?");
4127  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4128 
4129  // FIXME: This doesn't properly handle multiple uses.
4130  int Idx = -1;
4131  while (II != E && II->isInsideBundle()) {
4132  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
4133  if (Idx != -1)
4134  break;
4135  if (II->getOpcode() != ARM::t2IT)
4136  ++Dist;
4137  ++II;
4138  }
4139 
4140  if (Idx == -1) {
4141  Dist = 0;
4142  return nullptr;
4143  }
4144 
4145  UseIdx = Idx;
4146  return &*II;
4147 }
4148 
4149 /// Return the number of cycles to add to (or subtract from) the static
4150 /// itinerary based on the def opcode and alignment. The caller will ensure that
4151 /// adjusted latency is at least one cycle.
4152 static int adjustDefLatency(const ARMSubtarget &Subtarget,
4153  const MachineInstr &DefMI,
4154  const MCInstrDesc &DefMCID, unsigned DefAlign) {
4155  int Adjust = 0;
4156  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4157  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4158  // variants are one cycle cheaper.
4159  switch (DefMCID.getOpcode()) {
4160  default: break;
4161  case ARM::LDRrs:
4162  case ARM::LDRBrs: {
4163  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4164  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4165  if (ShImm == 0 ||
4166  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4167  --Adjust;
4168  break;
4169  }
4170  case ARM::t2LDRs:
4171  case ARM::t2LDRBs:
4172  case ARM::t2LDRHs:
4173  case ARM::t2LDRSHs: {
4174  // Thumb2 mode: lsl only.
4175  unsigned ShAmt = DefMI.getOperand(3).getImm();
4176  if (ShAmt == 0 || ShAmt == 2)
4177  --Adjust;
4178  break;
4179  }
4180  }
4181  } else if (Subtarget.isSwift()) {
4182  // FIXME: Properly handle all of the latency adjustments for address
4183  // writeback.
4184  switch (DefMCID.getOpcode()) {
4185  default: break;
4186  case ARM::LDRrs:
4187  case ARM::LDRBrs: {
4188  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4189  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4190  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4191  if (!isSub &&
4192  (ShImm == 0 ||
4193  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4194  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4195  Adjust -= 2;
4196  else if (!isSub &&
4197  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4198  --Adjust;
4199  break;
4200  }
4201  case ARM::t2LDRs:
4202  case ARM::t2LDRBs:
4203  case ARM::t2LDRHs:
4204  case ARM::t2LDRSHs: {
4205  // Thumb2 mode: lsl only.
4206  unsigned ShAmt = DefMI.getOperand(3).getImm();
4207  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4208  Adjust -= 2;
4209  break;
4210  }
4211  }
4212  }
4213 
4214  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4215  switch (DefMCID.getOpcode()) {
4216  default: break;
4217  case ARM::VLD1q8:
4218  case ARM::VLD1q16:
4219  case ARM::VLD1q32:
4220  case ARM::VLD1q64:
4221  case ARM::VLD1q8wb_fixed:
4222  case ARM::VLD1q16wb_fixed:
4223  case ARM::VLD1q32wb_fixed:
4224  case ARM::VLD1q64wb_fixed:
4225  case ARM::VLD1q8wb_register:
4226  case ARM::VLD1q16wb_register:
4227  case ARM::VLD1q32wb_register:
4228  case ARM::VLD1q64wb_register:
4229  case ARM::VLD2d8:
4230  case ARM::VLD2d16:
4231  case ARM::VLD2d32:
4232  case ARM::VLD2q8:
4233  case ARM::VLD2q16:
4234  case ARM::VLD2q32:
4235  case ARM::VLD2d8wb_fixed:
4236  case ARM::VLD2d16wb_fixed:
4237  case ARM::VLD2d32wb_fixed:
4238  case ARM::VLD2q8wb_fixed:
4239  case ARM::VLD2q16wb_fixed:
4240  case ARM::VLD2q32wb_fixed:
4241  case ARM::VLD2d8wb_register:
4242  case ARM::VLD2d16wb_register:
4243  case ARM::VLD2d32wb_register:
4244  case ARM::VLD2q8wb_register:
4245  case ARM::VLD2q16wb_register:
4246  case ARM::VLD2q32wb_register:
4247  case ARM::VLD3d8:
4248  case ARM::VLD3d16:
4249  case ARM::VLD3d32:
4250  case ARM::VLD1d64T:
4251  case ARM::VLD3d8_UPD:
4252  case ARM::VLD3d16_UPD:
4253  case ARM::VLD3d32_UPD:
4254  case ARM::VLD1d64Twb_fixed:
4255  case ARM::VLD1d64Twb_register:
4256  case ARM::VLD3q8_UPD:
4257  case ARM::VLD3q16_UPD:
4258  case ARM::VLD3q32_UPD:
4259  case ARM::VLD4d8:
4260  case ARM::VLD4d16:
4261  case ARM::VLD4d32:
4262  case ARM::VLD1d64Q:
4263  case ARM::VLD4d8_UPD:
4264  case ARM::VLD4d16_UPD:
4265  case ARM::VLD4d32_UPD:
4266  case ARM::VLD1d64Qwb_fixed:
4267  case ARM::VLD1d64Qwb_register:
4268  case ARM::VLD4q8_UPD:
4269  case ARM::VLD4q16_UPD:
4270  case ARM::VLD4q32_UPD:
4271  case ARM::VLD1DUPq8:
4272  case ARM::VLD1DUPq16:
4273  case ARM::VLD1DUPq32:
4274  case ARM::VLD1DUPq8wb_fixed:
4275  case ARM::VLD1DUPq16wb_fixed:
4276  case ARM::VLD1DUPq32wb_fixed:
4277  case ARM::VLD1DUPq8wb_register:
4278  case ARM::VLD1DUPq16wb_register:
4279  case ARM::VLD1DUPq32wb_register:
4280  case ARM::VLD2DUPd8:
4281  case ARM::VLD2DUPd16:
4282  case ARM::VLD2DUPd32:
4283  case ARM::VLD2DUPd8wb_fixed:
4284  case ARM::VLD2DUPd16wb_fixed:
4285  case ARM::VLD2DUPd32wb_fixed:
4286  case ARM::VLD2DUPd8wb_register:
4287  case ARM::VLD2DUPd16wb_register:
4288  case ARM::VLD2DUPd32wb_register:
4289  case ARM::VLD4DUPd8:
4290  case ARM::VLD4DUPd16:
4291  case ARM::VLD4DUPd32:
4292  case ARM::VLD4DUPd8_UPD:
4293  case ARM::VLD4DUPd16_UPD:
4294  case ARM::VLD4DUPd32_UPD:
4295  case ARM::VLD1LNd8:
4296  case ARM::VLD1LNd16:
4297  case ARM::VLD1LNd32:
4298  case ARM::VLD1LNd8_UPD:
4299  case ARM::VLD1LNd16_UPD:
4300  case ARM::VLD1LNd32_UPD:
4301  case ARM::VLD2LNd8:
4302  case ARM::VLD2LNd16:
4303  case ARM::VLD2LNd32:
4304  case ARM::VLD2LNq16:
4305  case ARM::VLD2LNq32:
4306  case ARM::VLD2LNd8_UPD:
4307  case ARM::VLD2LNd16_UPD:
4308  case ARM::VLD2LNd32_UPD:
4309  case ARM::VLD2LNq16_UPD:
4310  case ARM::VLD2LNq32_UPD:
4311  case ARM::VLD4LNd8:
4312  case ARM::VLD4LNd16:
4313  case ARM::VLD4LNd32:
4314  case ARM::VLD4LNq16:
4315  case ARM::VLD4LNq32:
4316  case ARM::VLD4LNd8_UPD:
4317  case ARM::VLD4LNd16_UPD:
4318  case ARM::VLD4LNd32_UPD:
4319  case ARM::VLD4LNq16_UPD:
4320  case ARM::VLD4LNq32_UPD:
4321  // If the address is not 64-bit aligned, the latencies of these
4322  // instructions increases by one.
4323  ++Adjust;
4324  break;
4325  }
4326  }
4327  return Adjust;
4328 }
4329 
4331  const MachineInstr &DefMI,
4332  unsigned DefIdx,
4333  const MachineInstr &UseMI,
4334  unsigned UseIdx) const {
4335  // No operand latency. The caller may fall back to getInstrLatency.
4336  if (!ItinData || ItinData->isEmpty())
4337  return -1;
4338 
4339  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4340  Register Reg = DefMO.getReg();
4341 
4342  const MachineInstr *ResolvedDefMI = &DefMI;
4343  unsigned DefAdj = 0;
4344  if (DefMI.isBundle())
4345  ResolvedDefMI =
4346  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4347  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4348  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4349  return 1;
4350  }
4351 
4352  const MachineInstr *ResolvedUseMI = &UseMI;
4353  unsigned UseAdj = 0;
4354  if (UseMI.isBundle()) {
4355  ResolvedUseMI =
4356  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4357  if (!ResolvedUseMI)
4358  return -1;
4359  }
4360 
4361  return getOperandLatencyImpl(
4362  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4363  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4364 }
4365 
4366 int ARMBaseInstrInfo::getOperandLatencyImpl(
4367  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4368  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4369  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4370  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4371  if (Reg == ARM::CPSR) {
4372  if (DefMI.getOpcode() == ARM::FMSTAT) {
4373  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4374  return Subtarget.isLikeA9() ? 1 : 20;
4375  }
4376 
4377  // CPSR set and branch can be paired in the same cycle.
4378  if (UseMI.isBranch())
4379  return 0;
4380 
4381  // Otherwise it takes the instruction latency (generally one).
4382  unsigned Latency = getInstrLatency(ItinData, DefMI);
4383 
4384  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4385  // its uses. Instructions which are otherwise scheduled between them may
4386  // incur a code size penalty (not able to use the CPSR setting 16-bit
4387  // instructions).
4388  if (Latency > 0 && Subtarget.isThumb2()) {
4389  const MachineFunction *MF = DefMI.getParent()->getParent();
4390  // FIXME: Use Function::hasOptSize().
4391  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4392  --Latency;
4393  }
4394  return Latency;
4395  }
4396 
4397  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4398  return -1;
4399 
4400  unsigned DefAlign = DefMI.hasOneMemOperand()
4401  ? (*DefMI.memoperands_begin())->getAlign().value()
4402  : 0;
4403  unsigned UseAlign = UseMI.hasOneMemOperand()
4404  ? (*UseMI.memoperands_begin())->getAlign().value()
4405  : 0;
4406 
4407  // Get the itinerary's latency if possible, and handle variable_ops.
4408  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4409  UseIdx, UseAlign);
4410  // Unable to find operand latency. The caller may resort to getInstrLatency.
4411  if (Latency < 0)
4412  return Latency;
4413 
4414  // Adjust for IT block position.
4415  int Adj = DefAdj + UseAdj;
4416 
4417  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4418  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4419  if (Adj >= 0 || (int)Latency > -Adj) {
4420  return Latency + Adj;
4421  }
4422  // Return the itinerary latency, which may be zero but not less than zero.
4423  return Latency;
4424 }
4425 
4426 int
4428  SDNode *DefNode, unsigned DefIdx,
4429  SDNode *UseNode, unsigned UseIdx) const {
4430  if (!DefNode->isMachineOpcode())
4431  return 1;
4432 
4433  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4434 
4435  if (isZeroCost(DefMCID.Opcode))
4436  return 0;
4437 
4438  if (!ItinData || ItinData->isEmpty())
4439  return DefMCID.mayLoad() ? 3 : 1;
4440 
4441  if (!UseNode->isMachineOpcode()) {
4442  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4443  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4444  int Threshold = 1 + Adj;
4445  return Latency <= Threshold ? 1 : Latency - Adj;
4446  }
4447 
4448  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4449  auto *DefMN = cast<MachineSDNode>(DefNode);
4450  unsigned DefAlign = !DefMN->memoperands_empty()
4451  ? (*DefMN->memoperands_begin())->getAlign().value()
4452  : 0;
4453  auto *UseMN = cast<MachineSDNode>(UseNode);
4454  unsigned UseAlign = !UseMN->memoperands_empty()
4455  ? (*UseMN->memoperands_begin())->getAlign().value()
4456  : 0;
4457  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4458  UseMCID, UseIdx, UseAlign);
4459 
4460  if (Latency > 1 &&
4461  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4462  Subtarget.isCortexA7())) {
4463  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4464  // variants are one cycle cheaper.
4465  switch (DefMCID.getOpcode()) {
4466  default: break;
4467  case ARM::LDRrs:
4468  case ARM::LDRBrs: {
4469  unsigned ShOpVal =
4470  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4471  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4472  if (ShImm == 0 ||
4473  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4474  --Latency;
4475  break;
4476  }
4477  case ARM::t2LDRs:
4478  case ARM::t2LDRBs:
4479  case ARM::t2LDRHs:
4480  case ARM::t2LDRSHs: {
4481  // Thumb2 mode: lsl only.
4482  unsigned ShAmt =
4483  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4484  if (ShAmt == 0 || ShAmt == 2)
4485  --Latency;
4486  break;
4487  }
4488  }
4489  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4490  // FIXME: Properly handle all of the latency adjustments for address
4491  // writeback.
4492  switch (DefMCID.getOpcode()) {
4493  default: break;
4494  case ARM::LDRrs:
4495  case ARM::LDRBrs: {
4496  unsigned ShOpVal =
4497  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4498  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4499  if (ShImm == 0 ||
4500  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4501  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4502  Latency -= 2;
4503  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4504  --Latency;
4505  break;
4506  }
4507  case ARM::t2LDRs:
4508  case ARM::t2LDRBs:
4509  case ARM::t2LDRHs:
4510  case ARM::t2LDRSHs:
4511  // Thumb2 mode: lsl 0-3 only.
4512  Latency -= 2;
4513  break;
4514  }
4515  }
4516 
4517  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4518  switch (DefMCID.getOpcode()) {
4519  default: break;
4520  case ARM::VLD1q8:
4521  case ARM::VLD1q16:
4522  case ARM::VLD1q32:
4523  case ARM::VLD1q64:
4524  case ARM::VLD1q8wb_register:
4525  case ARM::VLD1q16wb_register:
4526  case ARM::VLD1q32wb_register:
4527  case ARM::VLD1q64wb_register:
4528  case ARM::VLD1q8wb_fixed:
4529  case ARM::VLD1q16wb_fixed:
4530  case ARM::VLD1q32wb_fixed:
4531  case ARM::VLD1q64wb_fixed:
4532  case ARM::VLD2d8:
4533  case ARM::VLD2d16:
4534  case ARM::VLD2d32:
4535  case ARM::VLD2q8Pseudo:
4536  case ARM::VLD2q16Pseudo:
4537  case ARM::VLD2q32Pseudo:
4538  case ARM::VLD2d8wb_fixed:
4539  case ARM::VLD2d16wb_fixed:
4540  case ARM::VLD2d32wb_fixed:
4541  case ARM::VLD2q8PseudoWB_fixed:
4542  case ARM::VLD2q16PseudoWB_fixed:
4543  case ARM::VLD2q32PseudoWB_fixed:
4544  case ARM::VLD2d8wb_register:
4545  case ARM::VLD2d16wb_register:
4546  case ARM::VLD2d32wb_register:
4547  case ARM::VLD2q8PseudoWB_register:
4548  case ARM::VLD2q16PseudoWB_register:
4549  case ARM::VLD2q32PseudoWB_register:
4550  case ARM::VLD3d8Pseudo:
4551  case ARM::VLD3d16Pseudo:
4552  case ARM::VLD3d32Pseudo:
4553  case ARM::VLD1d8TPseudo:
4554  case ARM::VLD1d16TPseudo:
4555  case ARM::VLD1d32TPseudo:
4556  case ARM::VLD1d64TPseudo:
4557  case ARM::VLD1d64TPseudoWB_fixed:
4558  case ARM::VLD1d64TPseudoWB_register:
4559  case ARM::VLD3d8Pseudo_UPD:
4560  case ARM::VLD3d16Pseudo_UPD:
4561  case ARM::VLD3d32Pseudo_UPD:
4562  case ARM::VLD3q8Pseudo_UPD:
4563  case ARM::VLD3q16Pseudo_UPD:
4564  case ARM::VLD3q32Pseudo_UPD:
4565  case ARM::VLD3q8oddPseudo:
4566  case ARM::VLD3q16oddPseudo:
4567  case ARM::VLD3q32oddPseudo:
4568  case ARM::VLD3q8oddPseudo_UPD:
4569  case ARM::VLD3q16oddPseudo_UPD:
4570  case ARM::VLD3q32oddPseudo_UPD:
4571  case ARM::VLD4d8Pseudo:
4572  case ARM::VLD4d16Pseudo:
4573  case ARM::VLD4d32Pseudo:
4574  case ARM::VLD1d8QPseudo:
4575  case ARM::VLD1d16QPseudo:
4576  case ARM::VLD1d32QPseudo:
4577  case ARM::VLD1d64QPseudo:
4578  case ARM::VLD1d64QPseudoWB_fixed:
4579  case ARM::VLD1d64QPseudoWB_register:
4580  case ARM::VLD1q8HighQPseudo:
4581  case ARM::VLD1q8LowQPseudo_UPD:
4582  case ARM::VLD1q8HighTPseudo:
4583  case ARM::VLD1q8LowTPseudo_UPD:
4584  case ARM::VLD1q16HighQPseudo:
4585  case ARM::VLD1q16LowQPseudo_UPD:
4586  case ARM::VLD1q16HighTPseudo:
4587  case ARM::VLD1q16LowTPseudo_UPD:
4588  case ARM::VLD1q32HighQPseudo:
4589  case ARM::VLD1q32LowQPseudo_UPD:
4590  case ARM::VLD1q32HighTPseudo:
4591  case ARM::VLD1q32LowTPseudo_UPD:
4592  case ARM::VLD1q64HighQPseudo:
4593  case ARM::VLD1q64LowQPseudo_UPD:
4594  case ARM::VLD1q64HighTPseudo:
4595  case ARM::VLD1q64LowTPseudo_UPD:
4596  case ARM::VLD4d8Pseudo_UPD:
4597  case ARM::VLD4d16Pseudo_UPD:
4598  case ARM::VLD4d32Pseudo_UPD:
4599  case ARM::VLD4q8Pseudo_UPD:
4600  case ARM::VLD4q16Pseudo_UPD:
4601  case ARM::VLD4q32Pseudo_UPD:
4602  case ARM::VLD4q8oddPseudo:
4603  case ARM::VLD4q16oddPseudo:
4604  case ARM::VLD4q32oddPseudo:
4605  case ARM::VLD4q8oddPseudo_UPD:
4606  case ARM::VLD4q16oddPseudo_UPD:
4607  case ARM::VLD4q32oddPseudo_UPD:
4608  case ARM::VLD1DUPq8:
4609  case ARM::VLD1DUPq16:
4610  case ARM::VLD1DUPq32:
4611  case ARM::VLD1DUPq8wb_fixed:
4612  case ARM::VLD1DUPq16wb_fixed:
4613  case ARM::VLD1DUPq32wb_fixed:
4614  case ARM::VLD1DUPq8wb_register:
4615  case ARM::VLD1DUPq16wb_register:
4616  case ARM::VLD1DUPq32wb_register:
4617  case ARM::VLD2DUPd8:
4618  case ARM::VLD2DUPd16:
4619  case ARM::VLD2DUPd32:
4620  case ARM::VLD2DUPd8wb_fixed:
4621  case ARM::VLD2DUPd16wb_fixed:
4622  case ARM::VLD2DUPd32wb_fixed:
4623  case ARM::VLD2DUPd8wb_register:
4624  case ARM::VLD2DUPd16wb_register:
4625  case ARM::VLD2DUPd32wb_register:
4626  case ARM::VLD2DUPq8EvenPseudo:
4627  case ARM::VLD2DUPq8OddPseudo:
4628  case ARM::VLD2DUPq16EvenPseudo:
4629  case ARM::VLD2DUPq16OddPseudo:
4630  case ARM::VLD2DUPq32EvenPseudo:
4631  case ARM::VLD2DUPq32OddPseudo:
4632  case ARM::VLD3DUPq8EvenPseudo:
4633  case ARM::VLD3DUPq8OddPseudo:
4634  case ARM::VLD3DUPq16EvenPseudo:
4635  case ARM::VLD3DUPq16OddPseudo:
4636  case ARM::VLD3DUPq32EvenPseudo:
4637  case ARM::VLD3DUPq32OddPseudo:
4638  case ARM::VLD4DUPd8Pseudo:
4639  case ARM::VLD4DUPd16Pseudo:
4640  case ARM::VLD4DUPd32Pseudo:
4641  case ARM::VLD4DUPd8Pseudo_UPD:
4642  case ARM::VLD4DUPd16Pseudo_UPD:
4643  case ARM::VLD4DUPd32Pseudo_UPD:
4644  case ARM::VLD4DUPq8EvenPseudo:
4645  case ARM::VLD4DUPq8OddPseudo:
4646  case ARM::VLD4DUPq16EvenPseudo:
4647  case ARM::VLD4DUPq16OddPseudo:
4648  case ARM::VLD4DUPq32EvenPseudo:
4649  case ARM::VLD4DUPq32OddPseudo:
4650  case ARM::VLD1LNq8Pseudo:
4651  case ARM::VLD1LNq16Pseudo:
4652  case ARM::VLD1LNq32Pseudo:
4653  case ARM::VLD1LNq8Pseudo_UPD:
4654  case ARM::VLD1LNq16Pseudo_UPD:
4655  case ARM::VLD1LNq32Pseudo_UPD:
4656  case ARM::VLD2LNd8Pseudo:
4657  case ARM::VLD2LNd16Pseudo:
4658  case ARM::VLD2LNd32Pseudo:
4659  case ARM::VLD2LNq16Pseudo:
4660  case ARM::VLD2LNq32Pseudo:
4661  case ARM::VLD2LNd8Pseudo_UPD:
4662  case ARM::VLD2LNd16Pseudo_UPD:
4663  case ARM::VLD2LNd32Pseudo_UPD:
4664  case ARM::VLD2LNq16Pseudo_UPD:
4665  case ARM::VLD2LNq32Pseudo_UPD:
4666  case ARM::VLD4LNd8Pseudo:
4667  case ARM::VLD4LNd16Pseudo:
4668  case ARM::VLD4LNd32Pseudo:
4669  case ARM::VLD4LNq16Pseudo:
4670  case ARM::VLD4LNq32Pseudo:
4671  case ARM::VLD4LNd8Pseudo_UPD:
4672  case ARM::VLD4LNd16Pseudo_UPD:
4673  case ARM::VLD4LNd32Pseudo_UPD:
4674  case ARM::VLD4LNq16Pseudo_UPD:
4675  case ARM::VLD4LNq32Pseudo_UPD:
4676  // If the address is not 64-bit aligned, the latencies of these
4677  // instructions increases by one.
4678  ++Latency;
4679  break;
4680  }
4681 
4682  return Latency;
4683 }
4684 
4685 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4686  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4687  MI.isImplicitDef())
4688  return 0;
4689 
4690  if (MI.isBundle())
4691  return 0;
4692 
4693  const MCInstrDesc &MCID = MI.getDesc();
4694 
4695  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4696  !Subtarget.cheapPredicableCPSRDef())) {
4697  // When predicated, CPSR is an additional source operand for CPSR updating
4698  // instructions, this apparently increases their latencies.
4699  return 1;
4700  }
4701  return 0;
4702 }
4703 
4704 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4705  const MachineInstr &MI,
4706  unsigned *PredCost) const {
4707  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4708  MI.isImplicitDef())
4709  return 1;
4710 
4711  // An instruction scheduler typically runs on unbundled instructions, however
4712  // other passes may query the latency of a bundled instruction.
4713  if (MI.isBundle()) {
4714  unsigned Latency = 0;
4716  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4717  while (++I != E && I->isInsideBundle()) {
4718  if (I->getOpcode() != ARM::t2IT)
4719  Latency += getInstrLatency(ItinData, *I, PredCost);
4720  }
4721  return Latency;
4722  }
4723 
4724  const MCInstrDesc &MCID = MI.getDesc();
4725  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4726  !Subtarget.cheapPredicableCPSRDef()))) {
4727  // When predicated, CPSR is an additional source operand for CPSR updating
4728  // instructions, this apparently increases their latencies.
4729  *PredCost = 1;
4730  }
4731  // Be sure to call getStageLatency for an empty itinerary in case it has a
4732  // valid MinLatency property.
4733  if (!ItinData)
4734  return MI.mayLoad() ? 3 : 1;
4735 
4736  unsigned Class = MCID.getSchedClass();
4737 
4738  // For instructions with variable uops, use uops as latency.
4739  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4740  return getNumMicroOps(ItinData, MI);
4741 
4742  // For the common case, fall back on the itinerary's latency.
4743  unsigned Latency = ItinData->getStageLatency(Class);
4744 
4745  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4746  unsigned DefAlign =
4747  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4748  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4749  if (Adj >= 0 || (int)Latency > -Adj) {
4750  return Latency + Adj;
4751  }
4752  return Latency;
4753 }
4754 
4755 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4756  SDNode *Node) const {
4757  if (!Node->isMachineOpcode())
4758  return 1;
4759 
4760  if (!ItinData || ItinData->isEmpty())
4761  return 1;
4762 
4763  unsigned Opcode = Node->getMachineOpcode();
4764  switch (Opcode) {
4765  default:
4766  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4767  case ARM::VLDMQIA:
4768  case ARM::VSTMQIA:
4769  return 2;
4770  }
4771 }
4772 
4773 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4774  const MachineRegisterInfo *MRI,
4775  const MachineInstr &DefMI,
4776  unsigned DefIdx,
4777  const MachineInstr &UseMI,
4778  unsigned UseIdx) const {
4779  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4780  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4781  if (Subtarget.nonpipelinedVFP() &&
4782  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4783  return true;
4784 
4785  // Hoist VFP / NEON instructions with 4 or higher latency.
4786  unsigned Latency =
4787  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4788  if (Latency <= 3)
4789  return false;
4790  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4791  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4792 }
4793 
4794 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4795  const MachineInstr &DefMI,
4796  unsigned DefIdx) const {
4797  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4798  if (!ItinData || ItinData->isEmpty())
4799  return false;
4800 
4801  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4802  if (DDomain == ARMII::DomainGeneral) {
4803  unsigned DefClass = DefMI.getDesc().getSchedClass();
4804  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4805  return (DefCycle != -1 && DefCycle <= 2);
4806  }
4807  return false;
4808 }
4809 
4810 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4811  StringRef &ErrInfo) const {
4812  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4813  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4814  return false;
4815  }
4816  if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4817  // Make sure we don't generate a lo-lo mov that isn't supported.
4818  if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4819  !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4820  ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4821  return false;
4822  }
4823  }
4824  if (MI.getOpcode() == ARM::tPUSH ||
4825  MI.getOpcode() == ARM::tPOP ||
4826  MI.getOpcode() == ARM::tPOP_RET) {
4827  for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
4828  if (MI.getOperand(i).isImplicit() ||
4829  !MI.getOperand(i).isReg())
4830  continue;
4831  Register Reg = MI.getOperand(i).getReg();
4832  if (Reg < ARM::R0 || Reg > ARM::R7) {
4833  if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4834  !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4835  ErrInfo = "Unsupported register in Thumb1 push/pop";
4836  return false;
4837  }
4838  }
4839  }
4840  }
4841  if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4842  assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4843  if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4844  MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4845  ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4846  return false;
4847  }
4848  }
4849  return true;
4850 }
4851 
4852 // LoadStackGuard has so far only been implemented for MachO. Different code
4853 // sequence is needed for other targets.
4855  unsigned LoadImmOpc,
4856  unsigned LoadOpc) const {
4857  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4858  "ROPI/RWPI not currently supported with stack guard");
4859 
4860  MachineBasicBlock &MBB = *MI->getParent();
4861  DebugLoc DL = MI->getDebugLoc();
4862  Register Reg = MI->getOperand(0).getReg();
4863  const GlobalValue *GV =
4864  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4865  MachineInstrBuilder MIB;
4866 
4867  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4869 
4870  if (Subtarget.isGVIndirectSymbol(GV)) {
4871  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4872  MIB.addReg(Reg, RegState::Kill).addImm(0);
4873  auto Flags = MachineMemOperand::MOLoad |
4877  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4878  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4879  }
4880 
4881  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4882  MIB.addReg(Reg, RegState::Kill)
4883  .addImm(0)
4884  .cloneMemRefs(*MI)
4885  .add(predOps(ARMCC::AL));
4886 }
4887 
4888 bool
4889 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4890  unsigned &AddSubOpc,
4891  bool &NegAcc, bool &HasLane) const {
4892  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4893  if (I == MLxEntryMap.end())
4894  return false;
4895 
4896  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4897  MulOpc = Entry.MulOpc;
4898  AddSubOpc = Entry.AddSubOpc;
4899  NegAcc = Entry.NegAcc;
4900  HasLane = Entry.HasLane;
4901  return true;
4902 }
4903 
4904 //===----------------------------------------------------------------------===//
4905 // Execution domains.
4906 //===----------------------------------------------------------------------===//
4907 //
4908 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4909 // and some can go down both. The vmov instructions go down the VFP pipeline,
4910 // but they can be changed to vorr equivalents that are executed by the NEON
4911 // pipeline.
4912 //
4913 // We use the following execution domain numbering:
4914 //
4917  ExeVFP = 1,
4919 };
4920 
4921 //
4922 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4923 //
4924 std::pair<uint16_t, uint16_t>
4926  // If we don't have access to NEON instructions then we won't be able
4927  // to swizzle anything to the NEON domain. Check to make sure.
4928  if (Subtarget.hasNEON()) {
4929  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4930  // if they are not predicated.
4931  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4932  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4933 
4934  // CortexA9 is particularly picky about mixing the two and wants these
4935  // converted.
4936  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4937  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4938  MI.getOpcode() == ARM::VMOVS))
4939  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4940  }
4941  // No other instructions can be swizzled, so just determine their domain.
4942  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4943 
4944  if (Domain & ARMII::DomainNEON)
4945  return std::make_pair(ExeNEON, 0);
4946 
4947  // Certain instructions can go either way on Cortex-A8.
4948  // Treat them as NEON instructions.
4949  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4950  return std::make_pair(ExeNEON, 0);
4951 
4952  if (Domain & ARMII::DomainVFP)
4953  return std::make_pair(ExeVFP, 0);
4954 
4955  return std::make_pair(ExeGeneric, 0);
4956 }
4957 
4959  unsigned SReg, unsigned &Lane) {
4960  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4961  Lane = 0;
4962 
4963  if (DReg != ARM::NoRegister)
4964  return DReg;
4965 
4966  Lane = 1;
4967  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4968 
4969  assert(DReg && "S-register with no D super-register?");
4970  return DReg;
4971 }
4972 
4973 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4974 /// set ImplicitSReg to a register number that must be marked as implicit-use or
4975 /// zero if no register needs to be defined as implicit-use.
4976 ///
4977 /// If the function cannot determine if an SPR should be marked implicit use or
4978 /// not, it returns false.
4979 ///
4980 /// This function handles cases where an instruction is being modified from taking
4981 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4982 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4983 /// lane of the DPR).
4984 ///
4985 /// If the other SPR is defined, an implicit-use of it should be added. Else,
4986 /// (including the case where the DPR itself is defined), it should not.
4987 ///
4989  MachineInstr &MI, unsigned DReg,
4990  unsigned Lane, unsigned &ImplicitSReg) {
4991  // If the DPR is defined or used already, the other SPR lane will be chained
4992  // correctly, so there is nothing to be done.
4993  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4994  ImplicitSReg = 0;
4995  return true;
4996  }
4997 
4998  // Otherwise we need to go searching to see if the SPR is set explicitly.
4999  ImplicitSReg = TRI->getSubReg(DReg,
5000  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5002  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5003 
5004  if (LQR == MachineBasicBlock::LQR_Live)
5005  return true;
5006  else if (LQR == MachineBasicBlock::LQR_Unknown)
5007  return false;
5008 
5009  // If the register is known not to be live, there is no need to add an
5010  // implicit-use.
5011  ImplicitSReg = 0;
5012  return true;
5013 }
5014 
5016  unsigned Domain) const {
5017  unsigned DstReg, SrcReg, DReg;
5018  unsigned Lane;
5019  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5021  switch (MI.getOpcode()) {
5022  default:
5023  llvm_unreachable("cannot handle opcode!");
5024  break;
5025  case ARM::VMOVD:
5026  if (Domain != ExeNEON)
5027  break;
5028 
5029  // Zap the predicate operands.
5030  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5031 
5032  // Make sure we've got NEON instructions.
5033  assert(Subtarget.hasNEON() && "VORRd requires NEON");
5034 
5035  // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5036  DstReg = MI.getOperand(0).getReg();
5037  SrcReg = MI.getOperand(1).getReg();
5038 
5039  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5040  MI.RemoveOperand(i - 1);
5041 
5042  // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5043  MI.setDesc(get(ARM::VORRd));
5044  MIB.addReg(DstReg, RegState::Define)
5045  .addReg(SrcReg)
5046  .addReg(SrcReg)
5047  .add(predOps(ARMCC::AL));
5048  break;
5049  case ARM::VMOVRS:
5050  if (Domain != ExeNEON)
5051  break;
5052  assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5053 
5054  // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5055  DstReg = MI.getOperand(0).getReg();
5056  SrcReg = MI.getOperand(1).getReg();
5057 
5058  for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5059  MI.RemoveOperand(i - 1);
5060 
5061  DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5062 
5063  // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5064  // Note that DSrc has been widened and the other lane may be undef, which
5065  // contaminates the entire register.
5066  MI.setDesc(get(ARM::VGETLNi32));
5067  MIB.addReg(DstReg, RegState::Define)