LLVM  15.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "MVETailPredUtils.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Triple.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/MC/MCAsmInfo.h"
52 #include "llvm/MC/MCInstrDesc.h"
55 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <new>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "arm-instrinfo"
73 
74 #define GET_INSTRINFO_CTOR_DTOR
75 #include "ARMGenInstrInfo.inc"
76 
77 static cl::opt<bool>
78 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
79  cl::desc("Enable ARM 2-addr to 3-addr conv"));
80 
81 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
82 struct ARM_MLxEntry {
83  uint16_t MLxOpc; // MLA / MLS opcode
84  uint16_t MulOpc; // Expanded multiplication opcode
85  uint16_t AddSubOpc; // Expanded add / sub opcode
86  bool NegAcc; // True if the acc is negated before the add / sub.
87  bool HasLane; // True if instruction has an extra "lane" operand.
88 };
89 
90 static const ARM_MLxEntry ARM_MLxTable[] = {
91  // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
92  // fp scalar ops
93  { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
94  { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
95  { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
96  { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
97  { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
98  { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
99  { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
100  { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
101 
102  // fp SIMD ops
103  { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
104  { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
105  { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
106  { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
107  { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
108  { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
109  { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
110  { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
111 };
112 
114  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
115  Subtarget(STI) {
116  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
117  if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
118  llvm_unreachable("Duplicated entries?");
119  MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
120  MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
121  }
122 }
123 
124 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
125 // currently defaults to no prepass hazard recognizer.
128  const ScheduleDAG *DAG) const {
129  if (usePreRAHazardRecognizer()) {
130  const InstrItineraryData *II =
131  static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
132  return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
133  }
135 }
136 
137 // Called during:
138 // - pre-RA scheduling
139 // - post-RA scheduling when FeatureUseMISched is set
141  const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
143 
144  // We would like to restrict this hazard recognizer to only
145  // post-RA scheduling; we can tell that we're post-RA because we don't
146  // track VRegLiveness.
147  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
148  // banks banked on bit 2. Assume that TCMs are in use.
149  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
150  MHR->AddHazardRecognizer(
151  std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
152 
153  // Not inserting ARMHazardRecognizerFPMLx because that would change
154  // legacy behavior
155 
157  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
158  return MHR;
159 }
160 
161 // Called during post-RA scheduling when FeatureUseMISched is not set
164  const ScheduleDAG *DAG) const {
166 
167  if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
168  MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
169 
171  if (BHR)
172  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
173  return MHR;
174 }
175 
176 MachineInstr *
178  LiveIntervals *LIS) const {
179  // FIXME: Thumb2 support.
180 
181  if (!EnableARM3Addr)
182  return nullptr;
183 
184  MachineFunction &MF = *MI.getParent()->getParent();
185  uint64_t TSFlags = MI.getDesc().TSFlags;
186  bool isPre = false;
187  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
188  default: return nullptr;
189  case ARMII::IndexModePre:
190  isPre = true;
191  break;
193  break;
194  }
195 
196  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
197  // operation.
198  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
199  if (MemOpc == 0)
200  return nullptr;
201 
202  MachineInstr *UpdateMI = nullptr;
203  MachineInstr *MemMI = nullptr;
204  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
205  const MCInstrDesc &MCID = MI.getDesc();
206  unsigned NumOps = MCID.getNumOperands();
207  bool isLoad = !MI.mayStore();
208  const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
209  const MachineOperand &Base = MI.getOperand(2);
210  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
211  Register WBReg = WB.getReg();
212  Register BaseReg = Base.getReg();
213  Register OffReg = Offset.getReg();
214  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
215  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
216  switch (AddrMode) {
217  default: llvm_unreachable("Unknown indexed op!");
218  case ARMII::AddrMode2: {
219  bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
220  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
221  if (OffReg == 0) {
222  if (ARM_AM::getSOImmVal(Amt) == -1)
223  // Can't encode it in a so_imm operand. This transformation will
224  // add more than 1 instruction. Abandon!
225  return nullptr;
226  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
227  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
228  .addReg(BaseReg)
229  .addImm(Amt)
230  .add(predOps(Pred))
231  .add(condCodeOp());
232  } else if (Amt != 0) {
234  unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
235  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
236  get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
237  .addReg(BaseReg)
238  .addReg(OffReg)
239  .addReg(0)
240  .addImm(SOOpc)
241  .add(predOps(Pred))
242  .add(condCodeOp());
243  } else
244  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
245  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
246  .addReg(BaseReg)
247  .addReg(OffReg)
248  .add(predOps(Pred))
249  .add(condCodeOp());
250  break;
251  }
252  case ARMII::AddrMode3 : {
253  bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
254  unsigned Amt = ARM_AM::getAM3Offset(OffImm);
255  if (OffReg == 0)
256  // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
257  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
258  get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
259  .addReg(BaseReg)
260  .addImm(Amt)
261  .add(predOps(Pred))
262  .add(condCodeOp());
263  else
264  UpdateMI = BuildMI(MF, MI.getDebugLoc(),
265  get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
266  .addReg(BaseReg)
267  .addReg(OffReg)
268  .add(predOps(Pred))
269  .add(condCodeOp());
270  break;
271  }
272  }
273 
274  std::vector<MachineInstr*> NewMIs;
275  if (isPre) {
276  if (isLoad)
277  MemMI =
278  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
279  .addReg(WBReg)
280  .addImm(0)
281  .addImm(Pred);
282  else
283  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
284  .addReg(MI.getOperand(1).getReg())
285  .addReg(WBReg)
286  .addReg(0)
287  .addImm(0)
288  .addImm(Pred);
289  NewMIs.push_back(MemMI);
290  NewMIs.push_back(UpdateMI);
291  } else {
292  if (isLoad)
293  MemMI =
294  BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
295  .addReg(BaseReg)
296  .addImm(0)
297  .addImm(Pred);
298  else
299  MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
300  .addReg(MI.getOperand(1).getReg())
301  .addReg(BaseReg)
302  .addReg(0)
303  .addImm(0)
304  .addImm(Pred);
305  if (WB.isDead())
306  UpdateMI->getOperand(0).setIsDead();
307  NewMIs.push_back(UpdateMI);
308  NewMIs.push_back(MemMI);
309  }
310 
311  // Transfer LiveVariables states, kill / dead info.
312  if (LV) {
313  for (const MachineOperand &MO : MI.operands()) {
314  if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
315  Register Reg = MO.getReg();
316 
318  if (MO.isDef()) {
319  MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
320  if (MO.isDead())
321  LV->addVirtualRegisterDead(Reg, *NewMI);
322  }
323  if (MO.isUse() && MO.isKill()) {
324  for (unsigned j = 0; j < 2; ++j) {
325  // Look at the two new MI's in reverse order.
326  MachineInstr *NewMI = NewMIs[j];
327  if (!NewMI->readsRegister(Reg))
328  continue;
329  LV->addVirtualRegisterKilled(Reg, *NewMI);
330  if (VI.removeKill(MI))
331  VI.Kills.push_back(NewMI);
332  break;
333  }
334  }
335  }
336  }
337  }
338 
339  MachineBasicBlock &MBB = *MI.getParent();
340  MBB.insert(MI, NewMIs[1]);
341  MBB.insert(MI, NewMIs[0]);
342  return NewMIs[0];
343 }
344 
345 // Branch analysis.
346 // Cond vector output format:
347 // 0 elements indicates an unconditional branch
348 // 2 elements indicates a conditional branch; the elements are
349 // the condition to check and the CPSR.
350 // 3 elements indicates a hardware loop end; the elements
351 // are the opcode, the operand value to test, and a dummy
352 // operand used to pad out to 3 operands.
354  MachineBasicBlock *&TBB,
355  MachineBasicBlock *&FBB,
357  bool AllowModify) const {
358  TBB = nullptr;
359  FBB = nullptr;
360 
362  if (I == MBB.instr_begin())
363  return false; // Empty blocks are easy.
364  --I;
365 
366  // Walk backwards from the end of the basic block until the branch is
367  // analyzed or we give up.
368  while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
369  // Flag to be raised on unanalyzeable instructions. This is useful in cases
370  // where we want to clean up on the end of the basic block before we bail
371  // out.
372  bool CantAnalyze = false;
373 
374  // Skip over DEBUG values, predicated nonterminators and speculation
375  // barrier terminators.
376  while (I->isDebugInstr() || !I->isTerminator() ||
377  isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
378  I->getOpcode() == ARM::t2DoLoopStartTP){
379  if (I == MBB.instr_begin())
380  return false;
381  --I;
382  }
383 
384  if (isIndirectBranchOpcode(I->getOpcode()) ||
385  isJumpTableBranchOpcode(I->getOpcode())) {
386  // Indirect branches and jump tables can't be analyzed, but we still want
387  // to clean up any instructions at the tail of the basic block.
388  CantAnalyze = true;
389  } else if (isUncondBranchOpcode(I->getOpcode())) {
390  TBB = I->getOperand(0).getMBB();
391  } else if (isCondBranchOpcode(I->getOpcode())) {
392  // Bail out if we encounter multiple conditional branches.
393  if (!Cond.empty())
394  return true;
395 
396  assert(!FBB && "FBB should have been null.");
397  FBB = TBB;
398  TBB = I->getOperand(0).getMBB();
399  Cond.push_back(I->getOperand(1));
400  Cond.push_back(I->getOperand(2));
401  } else if (I->isReturn()) {
402  // Returns can't be analyzed, but we should run cleanup.
403  CantAnalyze = true;
404  } else if (I->getOpcode() == ARM::t2LoopEnd &&
405  MBB.getParent()
408  if (!Cond.empty())
409  return true;
410  FBB = TBB;
411  TBB = I->getOperand(1).getMBB();
412  Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
413  Cond.push_back(I->getOperand(0));
414  Cond.push_back(MachineOperand::CreateImm(0));
415  } else {
416  // We encountered other unrecognized terminator. Bail out immediately.
417  return true;
418  }
419 
420  // Cleanup code - to be run for unpredicated unconditional branches and
421  // returns.
422  if (!isPredicated(*I) &&
423  (isUncondBranchOpcode(I->getOpcode()) ||
424  isIndirectBranchOpcode(I->getOpcode()) ||
425  isJumpTableBranchOpcode(I->getOpcode()) ||
426  I->isReturn())) {
427  // Forget any previous condition branch information - it no longer applies.
428  Cond.clear();
429  FBB = nullptr;
430 
431  // If we can modify the function, delete everything below this
432  // unconditional branch.
433  if (AllowModify) {
434  MachineBasicBlock::iterator DI = std::next(I);
435  while (DI != MBB.instr_end()) {
436  MachineInstr &InstToDelete = *DI;
437  ++DI;
438  // Speculation barriers must not be deleted.
439  if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
440  continue;
441  InstToDelete.eraseFromParent();
442  }
443  }
444  }
445 
446  if (CantAnalyze) {
447  // We may not be able to analyze the block, but we could still have
448  // an unconditional branch as the last instruction in the block, which
449  // just branches to layout successor. If this is the case, then just
450  // remove it if we're allowed to make modifications.
451  if (AllowModify && !isPredicated(MBB.back()) &&
453  TBB && MBB.isLayoutSuccessor(TBB))
454  removeBranch(MBB);
455  return true;
456  }
457 
458  if (I == MBB.instr_begin())
459  return false;
460 
461  --I;
462  }
463 
464  // We made it past the terminators without bailing out - we must have
465  // analyzed this branch successfully.
466  return false;
467 }
468 
470  int *BytesRemoved) const {
471  assert(!BytesRemoved && "code size not handled");
472 
474  if (I == MBB.end())
475  return 0;
476 
477  if (!isUncondBranchOpcode(I->getOpcode()) &&
478  !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
479  return 0;
480 
481  // Remove the branch.
482  I->eraseFromParent();
483 
484  I = MBB.end();
485 
486  if (I == MBB.begin()) return 1;
487  --I;
488  if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
489  return 1;
490 
491  // Remove the branch.
492  I->eraseFromParent();
493  return 2;
494 }
495 
497  MachineBasicBlock *TBB,
498  MachineBasicBlock *FBB,
500  const DebugLoc &DL,
501  int *BytesAdded) const {
502  assert(!BytesAdded && "code size not handled");
504  int BOpc = !AFI->isThumbFunction()
505  ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
506  int BccOpc = !AFI->isThumbFunction()
507  ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
508  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
509 
510  // Shouldn't be a fall through.
511  assert(TBB && "insertBranch must not be told to insert a fallthrough");
512  assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
513  "ARM branch conditions have two or three components!");
514 
515  // For conditional branches, we use addOperand to preserve CPSR flags.
516 
517  if (!FBB) {
518  if (Cond.empty()) { // Unconditional branch?
519  if (isThumb)
520  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
521  else
522  BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
523  } else if (Cond.size() == 2) {
524  BuildMI(&MBB, DL, get(BccOpc))
525  .addMBB(TBB)
526  .addImm(Cond[0].getImm())
527  .add(Cond[1]);
528  } else
529  BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
530  return 1;
531  }
532 
533  // Two-way conditional branch.
534  if (Cond.size() == 2)
535  BuildMI(&MBB, DL, get(BccOpc))
536  .addMBB(TBB)
537  .addImm(Cond[0].getImm())
538  .add(Cond[1]);
539  else if (Cond.size() == 3)
540  BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
541  if (isThumb)
542  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
543  else
544  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
545  return 2;
546 }
547 
550  if (Cond.size() == 2) {
551  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
552  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
553  return false;
554  }
555  return true;
556 }
557 
559  if (MI.isBundle()) {
561  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
562  while (++I != E && I->isInsideBundle()) {
563  int PIdx = I->findFirstPredOperandIdx();
564  if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
565  return true;
566  }
567  return false;
568  }
569 
570  int PIdx = MI.findFirstPredOperandIdx();
571  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
572 }
573 
575  const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
576  const TargetRegisterInfo *TRI) const {
577 
578  // First, let's see if there is a generic comment for this operand
579  std::string GenericComment =
581  if (!GenericComment.empty())
582  return GenericComment;
583 
584  // If not, check if we have an immediate operand.
585  if (!Op.isImm())
586  return std::string();
587 
588  // And print its corresponding condition code if the immediate is a
589  // predicate.
590  int FirstPredOp = MI.findFirstPredOperandIdx();
591  if (FirstPredOp != (int) OpIdx)
592  return std::string();
593 
594  std::string CC = "CC::";
595  CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
596  return CC;
597 }
598 
601  unsigned Opc = MI.getOpcode();
602  if (isUncondBranchOpcode(Opc)) {
603  MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
604  MachineInstrBuilder(*MI.getParent()->getParent(), MI)
605  .addImm(Pred[0].getImm())
606  .addReg(Pred[1].getReg());
607  return true;
608  }
609 
610  int PIdx = MI.findFirstPredOperandIdx();
611  if (PIdx != -1) {
612  MachineOperand &PMO = MI.getOperand(PIdx);
613  PMO.setImm(Pred[0].getImm());
614  MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
615 
616  // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
617  // IT block. This affects how they are printed.
618  const MCInstrDesc &MCID = MI.getDesc();
620  assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
621  assert((MI.getOperand(1).isDead() ||
622  MI.getOperand(1).getReg() != ARM::CPSR) &&
623  "if conversion tried to stop defining used CPSR");
624  MI.getOperand(1).setReg(ARM::NoRegister);
625  }
626 
627  return true;
628  }
629  return false;
630 }
631 
633  ArrayRef<MachineOperand> Pred2) const {
634  if (Pred1.size() > 2 || Pred2.size() > 2)
635  return false;
636 
637  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
638  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
639  if (CC1 == CC2)
640  return true;
641 
642  switch (CC1) {
643  default:
644  return false;
645  case ARMCC::AL:
646  return true;
647  case ARMCC::HS:
648  return CC2 == ARMCC::HI;
649  case ARMCC::LS:
650  return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
651  case ARMCC::GE:
652  return CC2 == ARMCC::GT;
653  case ARMCC::LE:
654  return CC2 == ARMCC::LT;
655  }
656 }
657 
659  std::vector<MachineOperand> &Pred,
660  bool SkipDead) const {
661  bool Found = false;
662  for (const MachineOperand &MO : MI.operands()) {
663  bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
664  bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
665  if (ClobbersCPSR || IsCPSR) {
666 
667  // Filter out T1 instructions that have a dead CPSR,
668  // allowing IT blocks to be generated containing T1 instructions
669  const MCInstrDesc &MCID = MI.getDesc();
670  if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
671  SkipDead)
672  continue;
673 
674  Pred.push_back(MO);
675  Found = true;
676  }
677  }
678 
679  return Found;
680 }
681 
683  for (const auto &MO : MI.operands())
684  if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
685  return true;
686  return false;
687 }
688 
689 static bool isEligibleForITBlock(const MachineInstr *MI) {
690  switch (MI->getOpcode()) {
691  default: return true;
692  case ARM::tADC: // ADC (register) T1
693  case ARM::tADDi3: // ADD (immediate) T1
694  case ARM::tADDi8: // ADD (immediate) T2
695  case ARM::tADDrr: // ADD (register) T1
696  case ARM::tAND: // AND (register) T1
697  case ARM::tASRri: // ASR (immediate) T1
698  case ARM::tASRrr: // ASR (register) T1
699  case ARM::tBIC: // BIC (register) T1
700  case ARM::tEOR: // EOR (register) T1
701  case ARM::tLSLri: // LSL (immediate) T1
702  case ARM::tLSLrr: // LSL (register) T1
703  case ARM::tLSRri: // LSR (immediate) T1
704  case ARM::tLSRrr: // LSR (register) T1
705  case ARM::tMUL: // MUL T1
706  case ARM::tMVN: // MVN (register) T1
707  case ARM::tORR: // ORR (register) T1
708  case ARM::tROR: // ROR (register) T1
709  case ARM::tRSB: // RSB (immediate) T1
710  case ARM::tSBC: // SBC (register) T1
711  case ARM::tSUBi3: // SUB (immediate) T1
712  case ARM::tSUBi8: // SUB (immediate) T2
713  case ARM::tSUBrr: // SUB (register) T1
715  }
716 }
717 
718 /// isPredicable - Return true if the specified instruction can be predicated.
719 /// By default, this returns true for every instruction with a
720 /// PredicateOperand.
722  if (!MI.isPredicable())
723  return false;
724 
725  if (MI.isBundle())
726  return false;
727 
728  if (!isEligibleForITBlock(&MI))
729  return false;
730 
731  const MachineFunction *MF = MI.getParent()->getParent();
732  const ARMFunctionInfo *AFI =
733  MF->getInfo<ARMFunctionInfo>();
734 
735  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
736  // In their ARM encoding, they can't be encoded in a conditional form.
737  if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
738  return false;
739 
740  // Make indirect control flow changes unpredicable when SLS mitigation is
741  // enabled.
742  const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
743  if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
744  return false;
745  if (ST.hardenSlsBlr() && isIndirectCall(MI))
746  return false;
747 
748  if (AFI->isThumb2Function()) {
749  if (getSubtarget().restrictIT())
750  return isV8EligibleForIT(&MI);
751  }
752 
753  return true;
754 }
755 
756 namespace llvm {
757 
758 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
759  for (const MachineOperand &MO : MI->operands()) {
760  if (!MO.isReg() || MO.isUndef() || MO.isUse())
761  continue;
762  if (MO.getReg() != ARM::CPSR)
763  continue;
764  if (!MO.isDead())
765  return false;
766  }
767  // all definitions of CPSR are dead
768  return true;
769 }
770 
771 } // end namespace llvm
772 
773 /// GetInstSize - Return the size of the specified MachineInstr.
774 ///
776  const MachineBasicBlock &MBB = *MI.getParent();
777  const MachineFunction *MF = MBB.getParent();
778  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
779 
780  const MCInstrDesc &MCID = MI.getDesc();
781 
782  switch (MI.getOpcode()) {
783  default:
784  // Return the size specified in .td file. If there's none, return 0, as we
785  // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
786  // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
787  // contrast to AArch64 instructions which have a default size of 4 bytes for
788  // example.
789  return MCID.getSize();
790  case TargetOpcode::BUNDLE:
791  return getInstBundleLength(MI);
792  case ARM::CONSTPOOL_ENTRY:
793  case ARM::JUMPTABLE_INSTS:
794  case ARM::JUMPTABLE_ADDRS:
795  case ARM::JUMPTABLE_TBB:
796  case ARM::JUMPTABLE_TBH:
797  // If this machine instr is a constant pool entry, its size is recorded as
798  // operand #2.
799  return MI.getOperand(2).getImm();
800  case ARM::SPACE:
801  return MI.getOperand(1).getImm();
802  case ARM::INLINEASM:
803  case ARM::INLINEASM_BR: {
804  // If this machine instr is an inline asm, measure it.
805  unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
806  if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
807  Size = alignTo(Size, 4);
808  return Size;
809  }
810  }
811 }
812 
813 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
814  unsigned Size = 0;
816  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
817  while (++I != E && I->isInsideBundle()) {
818  assert(!I->isBundle() && "No nested bundle!");
819  Size += getInstSizeInBytes(*I);
820  }
821  return Size;
822 }
823 
826  unsigned DestReg, bool KillSrc,
827  const ARMSubtarget &Subtarget) const {
828  unsigned Opc = Subtarget.isThumb()
829  ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
830  : ARM::MRS;
831 
832  MachineInstrBuilder MIB =
833  BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
834 
835  // There is only 1 A/R class MRS instruction, and it always refers to
836  // APSR. However, there are lots of other possibilities on M-class cores.
837  if (Subtarget.isMClass())
838  MIB.addImm(0x800);
839 
840  MIB.add(predOps(ARMCC::AL))
841  .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
842 }
843 
846  unsigned SrcReg, bool KillSrc,
847  const ARMSubtarget &Subtarget) const {
848  unsigned Opc = Subtarget.isThumb()
849  ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
850  : ARM::MSR;
851 
852  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
853 
854  if (Subtarget.isMClass())
855  MIB.addImm(0x800);
856  else
857  MIB.addImm(8);
858 
859  MIB.addReg(SrcReg, getKillRegState(KillSrc))
862 }
863 
865  MIB.addImm(ARMVCC::None);
866  MIB.addReg(0);
867  MIB.addReg(0); // tp_reg
868 }
869 
871  Register DestReg) {
873  MIB.addReg(DestReg, RegState::Undef);
874 }
875 
877  MIB.addImm(Cond);
878  MIB.addReg(ARM::VPR, RegState::Implicit);
879  MIB.addReg(0); // tp_reg
880 }
881 
883  unsigned Cond, unsigned Inactive) {
885  MIB.addReg(Inactive);
886 }
887 
890  const DebugLoc &DL, MCRegister DestReg,
891  MCRegister SrcReg, bool KillSrc) const {
892  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
893  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
894 
895  if (GPRDest && GPRSrc) {
896  BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
897  .addReg(SrcReg, getKillRegState(KillSrc))
899  .add(condCodeOp());
900  return;
901  }
902 
903  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
904  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
905 
906  unsigned Opc = 0;
907  if (SPRDest && SPRSrc)
908  Opc = ARM::VMOVS;
909  else if (GPRDest && SPRSrc)
910  Opc = ARM::VMOVRS;
911  else if (SPRDest && GPRSrc)
912  Opc = ARM::VMOVSR;
913  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
914  Opc = ARM::VMOVD;
915  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
916  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
917 
918  if (Opc) {
919  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
920  MIB.addReg(SrcReg, getKillRegState(KillSrc));
921  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
922  MIB.addReg(SrcReg, getKillRegState(KillSrc));
923  if (Opc == ARM::MVE_VORR)
924  addUnpredicatedMveVpredROp(MIB, DestReg);
925  else if (Opc != ARM::MQPRCopy)
926  MIB.add(predOps(ARMCC::AL));
927  return;
928  }
929 
930  // Handle register classes that require multiple instructions.
931  unsigned BeginIdx = 0;
932  unsigned SubRegs = 0;
933  int Spacing = 1;
934 
935  // Use VORRq when possible.
936  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
937  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
938  BeginIdx = ARM::qsub_0;
939  SubRegs = 2;
940  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
941  Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
942  BeginIdx = ARM::qsub_0;
943  SubRegs = 4;
944  // Fall back to VMOVD.
945  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
946  Opc = ARM::VMOVD;
947  BeginIdx = ARM::dsub_0;
948  SubRegs = 2;
949  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
950  Opc = ARM::VMOVD;
951  BeginIdx = ARM::dsub_0;
952  SubRegs = 3;
953  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
954  Opc = ARM::VMOVD;
955  BeginIdx = ARM::dsub_0;
956  SubRegs = 4;
957  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
958  Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
959  BeginIdx = ARM::gsub_0;
960  SubRegs = 2;
961  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
962  Opc = ARM::VMOVD;
963  BeginIdx = ARM::dsub_0;
964  SubRegs = 2;
965  Spacing = 2;
966  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
967  Opc = ARM::VMOVD;
968  BeginIdx = ARM::dsub_0;
969  SubRegs = 3;
970  Spacing = 2;
971  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
972  Opc = ARM::VMOVD;
973  BeginIdx = ARM::dsub_0;
974  SubRegs = 4;
975  Spacing = 2;
976  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
977  !Subtarget.hasFP64()) {
978  Opc = ARM::VMOVS;
979  BeginIdx = ARM::ssub_0;
980  SubRegs = 2;
981  } else if (SrcReg == ARM::CPSR) {
982  copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
983  return;
984  } else if (DestReg == ARM::CPSR) {
985  copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
986  return;
987  } else if (DestReg == ARM::VPR) {
988  assert(ARM::GPRRegClass.contains(SrcReg));
989  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
990  .addReg(SrcReg, getKillRegState(KillSrc))
991  .add(predOps(ARMCC::AL));
992  return;
993  } else if (SrcReg == ARM::VPR) {
994  assert(ARM::GPRRegClass.contains(DestReg));
995  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
996  .addReg(SrcReg, getKillRegState(KillSrc))
997  .add(predOps(ARMCC::AL));
998  return;
999  } else if (DestReg == ARM::FPSCR_NZCV) {
1000  assert(ARM::GPRRegClass.contains(SrcReg));
1001  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1002  .addReg(SrcReg, getKillRegState(KillSrc))
1003  .add(predOps(ARMCC::AL));
1004  return;
1005  } else if (SrcReg == ARM::FPSCR_NZCV) {
1006  assert(ARM::GPRRegClass.contains(DestReg));
1007  BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1008  .addReg(SrcReg, getKillRegState(KillSrc))
1009  .add(predOps(ARMCC::AL));
1010  return;
1011  }
1012 
1013  assert(Opc && "Impossible reg-to-reg copy");
1014 
1016  MachineInstrBuilder Mov;
1017 
1018  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1019  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1020  BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1021  Spacing = -Spacing;
1022  }
1023 #ifndef NDEBUG
1024  SmallSet<unsigned, 4> DstRegs;
1025 #endif
1026  for (unsigned i = 0; i != SubRegs; ++i) {
1027  Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1028  Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1029  assert(Dst && Src && "Bad sub-register");
1030 #ifndef NDEBUG
1031  assert(!DstRegs.count(Src) && "destructive vector copy");
1032  DstRegs.insert(Dst);
1033 #endif
1034  Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1035  // VORR (NEON or MVE) takes two source operands.
1036  if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1037  Mov.addReg(Src);
1038  }
1039  // MVE VORR takes predicate operands in place of an ordinary condition.
1040  if (Opc == ARM::MVE_VORR)
1041  addUnpredicatedMveVpredROp(Mov, Dst);
1042  else
1043  Mov = Mov.add(predOps(ARMCC::AL));
1044  // MOVr can set CC.
1045  if (Opc == ARM::MOVr)
1046  Mov = Mov.add(condCodeOp());
1047  }
1048  // Add implicit super-register defs and kills to the last instruction.
1049  Mov->addRegisterDefined(DestReg, TRI);
1050  if (KillSrc)
1051  Mov->addRegisterKilled(SrcReg, TRI);
1052 }
1053 
1056  // VMOVRRD is also a copy instruction but it requires
1057  // special way of handling. It is more complex copy version
1058  // and since that we are not considering it. For recognition
1059  // of such instruction isExtractSubregLike MI interface fuction
1060  // could be used.
1061  // VORRq is considered as a move only if two inputs are
1062  // the same register.
1063  if (!MI.isMoveReg() ||
1064  (MI.getOpcode() == ARM::VORRq &&
1065  MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1066  return None;
1067  return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1068 }
1069 
1072  Register Reg) const {
1073  if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1074  Register DstReg = DstSrcPair->Destination->getReg();
1075 
1076  // TODO: We don't handle cases where the forwarding reg is narrower/wider
1077  // than the copy registers. Consider for example:
1078  //
1079  // s16 = VMOVS s0
1080  // s17 = VMOVS s1
1081  // call @callee(d0)
1082  //
1083  // We'd like to describe the call site value of d0 as d8, but this requires
1084  // gathering and merging the descriptions for the two VMOVS instructions.
1085  //
1086  // We also don't handle the reverse situation, where the forwarding reg is
1087  // narrower than the copy destination:
1088  //
1089  // d8 = VMOVD d0
1090  // call @callee(s1)
1091  //
1092  // We need to produce a fragment description (the call site value of s1 is
1093  // /not/ just d8).
1094  if (DstReg != Reg)
1095  return None;
1096  }
1098 }
1099 
1100 const MachineInstrBuilder &
1102  unsigned SubIdx, unsigned State,
1103  const TargetRegisterInfo *TRI) const {
1104  if (!SubIdx)
1105  return MIB.addReg(Reg, State);
1106 
1108  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1109  return MIB.addReg(Reg, State, SubIdx);
1110 }
1111 
1112 void ARMBaseInstrInfo::
1114  Register SrcReg, bool isKill, int FI,
1115  const TargetRegisterClass *RC,
1116  const TargetRegisterInfo *TRI) const {
1117  MachineFunction &MF = *MBB.getParent();
1118  MachineFrameInfo &MFI = MF.getFrameInfo();
1119  Align Alignment = MFI.getObjectAlign(FI);
1120 
1123  MFI.getObjectSize(FI), Alignment);
1124 
1125  switch (TRI->getSpillSize(*RC)) {
1126  case 2:
1127  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1128  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1129  .addReg(SrcReg, getKillRegState(isKill))
1130  .addFrameIndex(FI)
1131  .addImm(0)
1132  .addMemOperand(MMO)
1133  .add(predOps(ARMCC::AL));
1134  } else
1135  llvm_unreachable("Unknown reg class!");
1136  break;
1137  case 4:
1138  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1139  BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1140  .addReg(SrcReg, getKillRegState(isKill))
1141  .addFrameIndex(FI)
1142  .addImm(0)
1143  .addMemOperand(MMO)
1144  .add(predOps(ARMCC::AL));
1145  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1146  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1147  .addReg(SrcReg, getKillRegState(isKill))
1148  .addFrameIndex(FI)
1149  .addImm(0)
1150  .addMemOperand(MMO)
1151  .add(predOps(ARMCC::AL));
1152  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1153  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1154  .addReg(SrcReg, getKillRegState(isKill))
1155  .addFrameIndex(FI)
1156  .addImm(0)
1157  .addMemOperand(MMO)
1158  .add(predOps(ARMCC::AL));
1159  } else
1160  llvm_unreachable("Unknown reg class!");
1161  break;
1162  case 8:
1163  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1164  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1165  .addReg(SrcReg, getKillRegState(isKill))
1166  .addFrameIndex(FI)
1167  .addImm(0)
1168  .addMemOperand(MMO)
1169  .add(predOps(ARMCC::AL));
1170  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1171  if (Subtarget.hasV5TEOps()) {
1173  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1174  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1175  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1176  .add(predOps(ARMCC::AL));
1177  } else {
1178  // Fallback to STM instruction, which has existed since the dawn of
1179  // time.
1180  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1181  .addFrameIndex(FI)
1182  .addMemOperand(MMO)
1183  .add(predOps(ARMCC::AL));
1184  AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1185  AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1186  }
1187  } else
1188  llvm_unreachable("Unknown reg class!");
1189  break;
1190  case 16:
1191  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1192  // Use aligned spills if the stack can be realigned.
1193  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1194  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1195  .addFrameIndex(FI)
1196  .addImm(16)
1197  .addReg(SrcReg, getKillRegState(isKill))
1198  .addMemOperand(MMO)
1199  .add(predOps(ARMCC::AL));
1200  } else {
1201  BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1202  .addReg(SrcReg, getKillRegState(isKill))
1203  .addFrameIndex(FI)
1204  .addMemOperand(MMO)
1205  .add(predOps(ARMCC::AL));
1206  }
1207  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1208  Subtarget.hasMVEIntegerOps()) {
1209  auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1210  MIB.addReg(SrcReg, getKillRegState(isKill))
1211  .addFrameIndex(FI)
1212  .addImm(0)
1213  .addMemOperand(MMO);
1215  } else
1216  llvm_unreachable("Unknown reg class!");
1217  break;
1218  case 24:
1219  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1220  // Use aligned spills if the stack can be realigned.
1221  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1222  Subtarget.hasNEON()) {
1223  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1224  .addFrameIndex(FI)
1225  .addImm(16)
1226  .addReg(SrcReg, getKillRegState(isKill))
1227  .addMemOperand(MMO)
1228  .add(predOps(ARMCC::AL));
1229  } else {
1231  get(ARM::VSTMDIA))
1232  .addFrameIndex(FI)
1233  .add(predOps(ARMCC::AL))
1234  .addMemOperand(MMO);
1235  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1236  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1237  AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1238  }
1239  } else
1240  llvm_unreachable("Unknown reg class!");
1241  break;
1242  case 32:
1243  if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1244  ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1245  ARM::DQuadRegClass.hasSubClassEq(RC)) {
1246  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1247  Subtarget.hasNEON()) {
1248  // FIXME: It's possible to only store part of the QQ register if the
1249  // spilled def has a sub-register index.
1250  BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1251  .addFrameIndex(FI)
1252  .addImm(16)
1253  .addReg(SrcReg, getKillRegState(isKill))
1254  .addMemOperand(MMO)
1255  .add(predOps(ARMCC::AL));
1256  } else if (Subtarget.hasMVEIntegerOps()) {
1257  BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1258  .addReg(SrcReg, getKillRegState(isKill))
1259  .addFrameIndex(FI)
1260  .addMemOperand(MMO);
1261  } else {
1263  get(ARM::VSTMDIA))
1264  .addFrameIndex(FI)
1265  .add(predOps(ARMCC::AL))
1266  .addMemOperand(MMO);
1267  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1268  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1269  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1270  AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1271  }
1272  } else
1273  llvm_unreachable("Unknown reg class!");
1274  break;
1275  case 64:
1276  if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1277  Subtarget.hasMVEIntegerOps()) {
1278  BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1279  .addReg(SrcReg, getKillRegState(isKill))
1280  .addFrameIndex(FI)
1281  .addMemOperand(MMO);
1282  } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1283  MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1284  .addFrameIndex(FI)
1285  .add(predOps(ARMCC::AL))
1286  .addMemOperand(MMO);
1287  MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1288  MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1289  MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1290  MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1291  MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1292  MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1293  MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1294  AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1295  } else
1296  llvm_unreachable("Unknown reg class!");
1297  break;
1298  default:
1299  llvm_unreachable("Unknown reg class!");
1300  }
1301 }
1302 
1304  int &FrameIndex) const {
1305  switch (MI.getOpcode()) {
1306  default: break;
1307  case ARM::STRrs:
1308  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1309  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1310  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1311  MI.getOperand(3).getImm() == 0) {
1312  FrameIndex = MI.getOperand(1).getIndex();
1313  return MI.getOperand(0).getReg();
1314  }
1315  break;
1316  case ARM::STRi12:
1317  case ARM::t2STRi12:
1318  case ARM::tSTRspi:
1319  case ARM::VSTRD:
1320  case ARM::VSTRS:
1321  case ARM::VSTR_P0_off:
1322  case ARM::MVE_VSTRWU32:
1323  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1324  MI.getOperand(2).getImm() == 0) {
1325  FrameIndex = MI.getOperand(1).getIndex();
1326  return MI.getOperand(0).getReg();
1327  }
1328  break;
1329  case ARM::VST1q64:
1330  case ARM::VST1d64TPseudo:
1331  case ARM::VST1d64QPseudo:
1332  if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1333  FrameIndex = MI.getOperand(0).getIndex();
1334  return MI.getOperand(2).getReg();
1335  }
1336  break;
1337  case ARM::VSTMQIA:
1338  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1339  FrameIndex = MI.getOperand(1).getIndex();
1340  return MI.getOperand(0).getReg();
1341  }
1342  break;
1343  case ARM::MQQPRStore:
1344  case ARM::MQQQQPRStore:
1345  if (MI.getOperand(1).isFI()) {
1346  FrameIndex = MI.getOperand(1).getIndex();
1347  return MI.getOperand(0).getReg();
1348  }
1349  break;
1350  }
1351 
1352  return 0;
1353 }
1354 
1356  int &FrameIndex) const {
1358  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1359  Accesses.size() == 1) {
1360  FrameIndex =
1361  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1362  ->getFrameIndex();
1363  return true;
1364  }
1365  return false;
1366 }
1367 
1368 void ARMBaseInstrInfo::
1370  Register DestReg, int FI,
1371  const TargetRegisterClass *RC,
1372  const TargetRegisterInfo *TRI) const {
1373  DebugLoc DL;
1374  if (I != MBB.end()) DL = I->getDebugLoc();
1375  MachineFunction &MF = *MBB.getParent();
1376  MachineFrameInfo &MFI = MF.getFrameInfo();
1377  const Align Alignment = MFI.getObjectAlign(FI);
1380  MFI.getObjectSize(FI), Alignment);
1381 
1382  switch (TRI->getSpillSize(*RC)) {
1383  case 2:
1384  if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1385  BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1386  .addFrameIndex(FI)
1387  .addImm(0)
1388  .addMemOperand(MMO)
1389  .add(predOps(ARMCC::AL));
1390  } else
1391  llvm_unreachable("Unknown reg class!");
1392  break;
1393  case 4:
1394  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1395  BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1396  .addFrameIndex(FI)
1397  .addImm(0)
1398  .addMemOperand(MMO)
1399  .add(predOps(ARMCC::AL));
1400  } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1401  BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1402  .addFrameIndex(FI)
1403  .addImm(0)
1404  .addMemOperand(MMO)
1405  .add(predOps(ARMCC::AL));
1406  } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1407  BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1408  .addFrameIndex(FI)
1409  .addImm(0)
1410  .addMemOperand(MMO)
1411  .add(predOps(ARMCC::AL));
1412  } else
1413  llvm_unreachable("Unknown reg class!");
1414  break;
1415  case 8:
1416  if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1417  BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1418  .addFrameIndex(FI)
1419  .addImm(0)
1420  .addMemOperand(MMO)
1421  .add(predOps(ARMCC::AL));
1422  } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1423  MachineInstrBuilder MIB;
1424 
1425  if (Subtarget.hasV5TEOps()) {
1426  MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1427  AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1428  AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1429  MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1430  .add(predOps(ARMCC::AL));
1431  } else {
1432  // Fallback to LDM instruction, which has existed since the dawn of
1433  // time.
1434  MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1435  .addFrameIndex(FI)
1436  .addMemOperand(MMO)
1437  .add(predOps(ARMCC::AL));
1438  MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1439  MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1440  }
1441 
1442  if (Register::isPhysicalRegister(DestReg))
1443  MIB.addReg(DestReg, RegState::ImplicitDefine);
1444  } else
1445  llvm_unreachable("Unknown reg class!");
1446  break;
1447  case 16:
1448  if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1449  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1450  BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1451  .addFrameIndex(FI)
1452  .addImm(16)
1453  .addMemOperand(MMO)
1454  .add(predOps(ARMCC::AL));
1455  } else {
1456  BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1457  .addFrameIndex(FI)
1458  .addMemOperand(MMO)
1459  .add(predOps(ARMCC::AL));
1460  }
1461  } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1462  Subtarget.hasMVEIntegerOps()) {
1463  auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1464  MIB.addFrameIndex(FI)
1465  .addImm(0)
1466  .addMemOperand(MMO);
1468  } else
1469  llvm_unreachable("Unknown reg class!");
1470  break;
1471  case 24:
1472  if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1473  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1474  Subtarget.hasNEON()) {
1475  BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1476  .addFrameIndex(FI)
1477  .addImm(16)
1478  .addMemOperand(MMO)
1479  .add(predOps(ARMCC::AL));
1480  } else {
1481  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1482  .addFrameIndex(FI)
1483  .addMemOperand(MMO)
1484  .add(predOps(ARMCC::AL));
1485  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1486  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1487  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1488  if (Register::isPhysicalRegister(DestReg))
1489  MIB.addReg(DestReg, RegState::ImplicitDefine);
1490  }
1491  } else
1492  llvm_unreachable("Unknown reg class!");
1493  break;
1494  case 32:
1495  if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1496  ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1497  ARM::DQuadRegClass.hasSubClassEq(RC)) {
1498  if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1499  Subtarget.hasNEON()) {
1500  BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1501  .addFrameIndex(FI)
1502  .addImm(16)
1503  .addMemOperand(MMO)
1504  .add(predOps(ARMCC::AL));
1505  } else if (Subtarget.hasMVEIntegerOps()) {
1506  BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1507  .addFrameIndex(FI)
1508  .addMemOperand(MMO);
1509  } else {
1510  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1511  .addFrameIndex(FI)
1512  .add(predOps(ARMCC::AL))
1513  .addMemOperand(MMO);
1514  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1515  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1516  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1517  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1518  if (Register::isPhysicalRegister(DestReg))
1519  MIB.addReg(DestReg, RegState::ImplicitDefine);
1520  }
1521  } else
1522  llvm_unreachable("Unknown reg class!");
1523  break;
1524  case 64:
1525  if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1526  Subtarget.hasMVEIntegerOps()) {
1527  BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1528  .addFrameIndex(FI)
1529  .addMemOperand(MMO);
1530  } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1531  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1532  .addFrameIndex(FI)
1533  .add(predOps(ARMCC::AL))
1534  .addMemOperand(MMO);
1535  MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1536  MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1537  MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1538  MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1539  MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1540  MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1541  MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1542  MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1543  if (Register::isPhysicalRegister(DestReg))
1544  MIB.addReg(DestReg, RegState::ImplicitDefine);
1545  } else
1546  llvm_unreachable("Unknown reg class!");
1547  break;
1548  default:
1549  llvm_unreachable("Unknown regclass!");
1550  }
1551 }
1552 
1554  int &FrameIndex) const {
1555  switch (MI.getOpcode()) {
1556  default: break;
1557  case ARM::LDRrs:
1558  case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1559  if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1560  MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1561  MI.getOperand(3).getImm() == 0) {
1562  FrameIndex = MI.getOperand(1).getIndex();
1563  return MI.getOperand(0).getReg();
1564  }
1565  break;
1566  case ARM::LDRi12:
1567  case ARM::t2LDRi12:
1568  case ARM::tLDRspi:
1569  case ARM::VLDRD:
1570  case ARM::VLDRS:
1571  case ARM::VLDR_P0_off:
1572  case ARM::MVE_VLDRWU32:
1573  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1574  MI.getOperand(2).getImm() == 0) {
1575  FrameIndex = MI.getOperand(1).getIndex();
1576  return MI.getOperand(0).getReg();
1577  }
1578  break;
1579  case ARM::VLD1q64:
1580  case ARM::VLD1d8TPseudo:
1581  case ARM::VLD1d16TPseudo:
1582  case ARM::VLD1d32TPseudo:
1583  case ARM::VLD1d64TPseudo:
1584  case ARM::VLD1d8QPseudo:
1585  case ARM::VLD1d16QPseudo:
1586  case ARM::VLD1d32QPseudo:
1587  case ARM::VLD1d64QPseudo:
1588  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1589  FrameIndex = MI.getOperand(1).getIndex();
1590  return MI.getOperand(0).getReg();
1591  }
1592  break;
1593  case ARM::VLDMQIA:
1594  if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1595  FrameIndex = MI.getOperand(1).getIndex();
1596  return MI.getOperand(0).getReg();
1597  }
1598  break;
1599  case ARM::MQQPRLoad:
1600  case ARM::MQQQQPRLoad:
1601  if (MI.getOperand(1).isFI()) {
1602  FrameIndex = MI.getOperand(1).getIndex();
1603  return MI.getOperand(0).getReg();
1604  }
1605  break;
1606  }
1607 
1608  return 0;
1609 }
1610 
1612  int &FrameIndex) const {
1614  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1615  Accesses.size() == 1) {
1616  FrameIndex =
1617  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1618  ->getFrameIndex();
1619  return true;
1620  }
1621  return false;
1622 }
1623 
1624 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1625 /// depending on whether the result is used.
1626 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1627  bool isThumb1 = Subtarget.isThumb1Only();
1628  bool isThumb2 = Subtarget.isThumb2();
1629  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1630 
1631  DebugLoc dl = MI->getDebugLoc();
1632  MachineBasicBlock *BB = MI->getParent();
1633 
1634  MachineInstrBuilder LDM, STM;
1635  if (isThumb1 || !MI->getOperand(1).isDead()) {
1636  MachineOperand LDWb(MI->getOperand(1));
1637  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1638  : isThumb1 ? ARM::tLDMIA_UPD
1639  : ARM::LDMIA_UPD))
1640  .add(LDWb);
1641  } else {
1642  LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1643  }
1644 
1645  if (isThumb1 || !MI->getOperand(0).isDead()) {
1646  MachineOperand STWb(MI->getOperand(0));
1647  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1648  : isThumb1 ? ARM::tSTMIA_UPD
1649  : ARM::STMIA_UPD))
1650  .add(STWb);
1651  } else {
1652  STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1653  }
1654 
1655  MachineOperand LDBase(MI->getOperand(3));
1656  LDM.add(LDBase).add(predOps(ARMCC::AL));
1657 
1658  MachineOperand STBase(MI->getOperand(2));
1659  STM.add(STBase).add(predOps(ARMCC::AL));
1660 
1661  // Sort the scratch registers into ascending order.
1663  SmallVector<unsigned, 6> ScratchRegs;
1664  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1665  ScratchRegs.push_back(MI->getOperand(I).getReg());
1666  llvm::sort(ScratchRegs,
1667  [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1668  return TRI.getEncodingValue(Reg1) <
1669  TRI.getEncodingValue(Reg2);
1670  });
1671 
1672  for (const auto &Reg : ScratchRegs) {
1673  LDM.addReg(Reg, RegState::Define);
1674  STM.addReg(Reg, RegState::Kill);
1675  }
1676 
1677  BB->erase(MI);
1678 }
1679 
1681  if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1682  expandLoadStackGuard(MI);
1683  MI.getParent()->erase(MI);
1684  return true;
1685  }
1686 
1687  if (MI.getOpcode() == ARM::MEMCPY) {
1688  expandMEMCPY(MI);
1689  return true;
1690  }
1691 
1692  // This hook gets to expand COPY instructions before they become
1693  // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1694  // widened to VMOVD. We prefer the VMOVD when possible because it may be
1695  // changed into a VORR that can go down the NEON pipeline.
1696  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1697  return false;
1698 
1699  // Look for a copy between even S-registers. That is where we keep floats
1700  // when using NEON v2f32 instructions for f32 arithmetic.
1701  Register DstRegS = MI.getOperand(0).getReg();
1702  Register SrcRegS = MI.getOperand(1).getReg();
1703  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1704  return false;
1705 
1707  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1708  &ARM::DPRRegClass);
1709  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1710  &ARM::DPRRegClass);
1711  if (!DstRegD || !SrcRegD)
1712  return false;
1713 
1714  // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1715  // legal if the COPY already defines the full DstRegD, and it isn't a
1716  // sub-register insertion.
1717  if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1718  return false;
1719 
1720  // A dead copy shouldn't show up here, but reject it just in case.
1721  if (MI.getOperand(0).isDead())
1722  return false;
1723 
1724  // All clear, widen the COPY.
1725  LLVM_DEBUG(dbgs() << "widening: " << MI);
1726  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1727 
1728  // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1729  // or some other super-register.
1730  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1731  if (ImpDefIdx != -1)
1732  MI.removeOperand(ImpDefIdx);
1733 
1734  // Change the opcode and operands.
1735  MI.setDesc(get(ARM::VMOVD));
1736  MI.getOperand(0).setReg(DstRegD);
1737  MI.getOperand(1).setReg(SrcRegD);
1738  MIB.add(predOps(ARMCC::AL));
1739 
1740  // We are now reading SrcRegD instead of SrcRegS. This may upset the
1741  // register scavenger and machine verifier, so we need to indicate that we
1742  // are reading an undefined value from SrcRegD, but a proper value from
1743  // SrcRegS.
1744  MI.getOperand(1).setIsUndef();
1745  MIB.addReg(SrcRegS, RegState::Implicit);
1746 
1747  // SrcRegD may actually contain an unrelated value in the ssub_1
1748  // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1749  if (MI.getOperand(1).isKill()) {
1750  MI.getOperand(1).setIsKill(false);
1751  MI.addRegisterKilled(SrcRegS, TRI, true);
1752  }
1753 
1754  LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1755  return true;
1756 }
1757 
1758 /// Create a copy of a const pool value. Update CPI to the new index and return
1759 /// the label UID.
1760 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1763 
1764  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1765  assert(MCPE.isMachineConstantPoolEntry() &&
1766  "Expecting a machine constantpool entry!");
1767  ARMConstantPoolValue *ACPV =
1768  static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1769 
1770  unsigned PCLabelId = AFI->createPICLabelUId();
1771  ARMConstantPoolValue *NewCPV = nullptr;
1772 
1773  // FIXME: The below assumes PIC relocation model and that the function
1774  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1775  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1776  // instructions, so that's probably OK, but is PIC always correct when
1777  // we get here?
1778  if (ACPV->isGlobalValue())
1780  cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1781  4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1782  else if (ACPV->isExtSymbol())
1783  NewCPV = ARMConstantPoolSymbol::
1784  Create(MF.getFunction().getContext(),
1785  cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1786  else if (ACPV->isBlockAddress())
1787  NewCPV = ARMConstantPoolConstant::
1788  Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1790  else if (ACPV->isLSDA())
1791  NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1792  ARMCP::CPLSDA, 4);
1793  else if (ACPV->isMachineBasicBlock())
1794  NewCPV = ARMConstantPoolMBB::
1795  Create(MF.getFunction().getContext(),
1796  cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1797  else
1798  llvm_unreachable("Unexpected ARM constantpool value type!!");
1799  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1800  return PCLabelId;
1801 }
1802 
1805  Register DestReg, unsigned SubIdx,
1806  const MachineInstr &Orig,
1807  const TargetRegisterInfo &TRI) const {
1808  unsigned Opcode = Orig.getOpcode();
1809  switch (Opcode) {
1810  default: {
1812  MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1813  MBB.insert(I, MI);
1814  break;
1815  }
1816  case ARM::tLDRpci_pic:
1817  case ARM::t2LDRpci_pic: {
1818  MachineFunction &MF = *MBB.getParent();
1819  unsigned CPI = Orig.getOperand(1).getIndex();
1820  unsigned PCLabelId = duplicateCPV(MF, CPI);
1821  BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1822  .addConstantPoolIndex(CPI)
1823  .addImm(PCLabelId)
1824  .cloneMemRefs(Orig);
1825  break;
1826  }
1827  }
1828 }
1829 
1830 MachineInstr &
1832  MachineBasicBlock::iterator InsertBefore,
1833  const MachineInstr &Orig) const {
1834  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1836  for (;;) {
1837  switch (I->getOpcode()) {
1838  case ARM::tLDRpci_pic:
1839  case ARM::t2LDRpci_pic: {
1840  MachineFunction &MF = *MBB.getParent();
1841  unsigned CPI = I->getOperand(1).getIndex();
1842  unsigned PCLabelId = duplicateCPV(MF, CPI);
1843  I->getOperand(1).setIndex(CPI);
1844  I->getOperand(2).setImm(PCLabelId);
1845  break;
1846  }
1847  }
1848  if (!I->isBundledWithSucc())
1849  break;
1850  ++I;
1851  }
1852  return Cloned;
1853 }
1854 
1856  const MachineInstr &MI1,
1857  const MachineRegisterInfo *MRI) const {
1858  unsigned Opcode = MI0.getOpcode();
1859  if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1860  Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1861  Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1862  Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1863  Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1864  Opcode == ARM::t2MOV_ga_pcrel) {
1865  if (MI1.getOpcode() != Opcode)
1866  return false;
1867  if (MI0.getNumOperands() != MI1.getNumOperands())
1868  return false;
1869 
1870  const MachineOperand &MO0 = MI0.getOperand(1);
1871  const MachineOperand &MO1 = MI1.getOperand(1);
1872  if (MO0.getOffset() != MO1.getOffset())
1873  return false;
1874 
1875  if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1876  Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1877  Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1878  Opcode == ARM::t2MOV_ga_pcrel)
1879  // Ignore the PC labels.
1880  return MO0.getGlobal() == MO1.getGlobal();
1881 
1882  const MachineFunction *MF = MI0.getParent()->getParent();
1883  const MachineConstantPool *MCP = MF->getConstantPool();
1884  int CPI0 = MO0.getIndex();
1885  int CPI1 = MO1.getIndex();
1886  const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1887  const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1888  bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1889  bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1890  if (isARMCP0 && isARMCP1) {
1891  ARMConstantPoolValue *ACPV0 =
1892  static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1893  ARMConstantPoolValue *ACPV1 =
1894  static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1895  return ACPV0->hasSameValue(ACPV1);
1896  } else if (!isARMCP0 && !isARMCP1) {
1897  return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1898  }
1899  return false;
1900  } else if (Opcode == ARM::PICLDR) {
1901  if (MI1.getOpcode() != Opcode)
1902  return false;
1903  if (MI0.getNumOperands() != MI1.getNumOperands())
1904  return false;
1905 
1906  Register Addr0 = MI0.getOperand(1).getReg();
1907  Register Addr1 = MI1.getOperand(1).getReg();
1908  if (Addr0 != Addr1) {
1909  if (!MRI || !Register::isVirtualRegister(Addr0) ||
1911  return false;
1912 
1913  // This assumes SSA form.
1914  MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1915  MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1916  // Check if the loaded value, e.g. a constantpool of a global address, are
1917  // the same.
1918  if (!produceSameValue(*Def0, *Def1, MRI))
1919  return false;
1920  }
1921 
1922  for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1923  // %12 = PICLDR %11, 0, 14, %noreg
1924  const MachineOperand &MO0 = MI0.getOperand(i);
1925  const MachineOperand &MO1 = MI1.getOperand(i);
1926  if (!MO0.isIdenticalTo(MO1))
1927  return false;
1928  }
1929  return true;
1930  }
1931 
1933 }
1934 
1935 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1936 /// determine if two loads are loading from the same base address. It should
1937 /// only return true if the base pointers are the same and the only differences
1938 /// between the two addresses is the offset. It also returns the offsets by
1939 /// reference.
1940 ///
1941 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1942 /// is permanently disabled.
1944  int64_t &Offset1,
1945  int64_t &Offset2) const {
1946  // Don't worry about Thumb: just ARM and Thumb2.
1947  if (Subtarget.isThumb1Only()) return false;
1948 
1949  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1950  return false;
1951 
1952  switch (Load1->getMachineOpcode()) {
1953  default:
1954  return false;
1955  case ARM::LDRi12:
1956  case ARM::LDRBi12:
1957  case ARM::LDRD:
1958  case ARM::LDRH:
1959  case ARM::LDRSB:
1960  case ARM::LDRSH:
1961  case ARM::VLDRD:
1962  case ARM::VLDRS:
1963  case ARM::t2LDRi8:
1964  case ARM::t2LDRBi8:
1965  case ARM::t2LDRDi8:
1966  case ARM::t2LDRSHi8:
1967  case ARM::t2LDRi12:
1968  case ARM::t2LDRBi12:
1969  case ARM::t2LDRSHi12:
1970  break;
1971  }
1972 
1973  switch (Load2->getMachineOpcode()) {
1974  default:
1975  return false;
1976  case ARM::LDRi12:
1977  case ARM::LDRBi12:
1978  case ARM::LDRD:
1979  case ARM::LDRH:
1980  case ARM::LDRSB:
1981  case ARM::LDRSH:
1982  case ARM::VLDRD:
1983  case ARM::VLDRS:
1984  case ARM::t2LDRi8:
1985  case ARM::t2LDRBi8:
1986  case ARM::t2LDRSHi8:
1987  case ARM::t2LDRi12:
1988  case ARM::t2LDRBi12:
1989  case ARM::t2LDRSHi12:
1990  break;
1991  }
1992 
1993  // Check if base addresses and chain operands match.
1994  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1995  Load1->getOperand(4) != Load2->getOperand(4))
1996  return false;
1997 
1998  // Index should be Reg0.
1999  if (Load1->getOperand(3) != Load2->getOperand(3))
2000  return false;
2001 
2002  // Determine the offsets.
2003  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
2004  isa<ConstantSDNode>(Load2->getOperand(1))) {
2005  Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
2006  Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
2007  return true;
2008  }
2009 
2010  return false;
2011 }
2012 
2013 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2014 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2015 /// be scheduled togther. On some targets if two loads are loading from
2016 /// addresses in the same cache line, it's better if they are scheduled
2017 /// together. This function takes two integers that represent the load offsets
2018 /// from the common base address. It returns true if it decides it's desirable
2019 /// to schedule the two loads together. "NumLoads" is the number of loads that
2020 /// have already been scheduled after Load1.
2021 ///
2022 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2023 /// is permanently disabled.
2025  int64_t Offset1, int64_t Offset2,
2026  unsigned NumLoads) const {
2027  // Don't worry about Thumb: just ARM and Thumb2.
2028  if (Subtarget.isThumb1Only()) return false;
2029 
2030  assert(Offset2 > Offset1);
2031 
2032  if ((Offset2 - Offset1) / 8 > 64)
2033  return false;
2034 
2035  // Check if the machine opcodes are different. If they are different
2036  // then we consider them to not be of the same base address,
2037  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2038  // In this case, they are considered to be the same because they are different
2039  // encoding forms of the same basic instruction.
2040  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2041  !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2042  Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2043  (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2044  Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2045  return false; // FIXME: overly conservative?
2046 
2047  // Four loads in a row should be sufficient.
2048  if (NumLoads >= 3)
2049  return false;
2050 
2051  return true;
2052 }
2053 
2055  const MachineBasicBlock *MBB,
2056  const MachineFunction &MF) const {
2057  // Debug info is never a scheduling boundary. It's necessary to be explicit
2058  // due to the special treatment of IT instructions below, otherwise a
2059  // dbg_value followed by an IT will result in the IT instruction being
2060  // considered a scheduling hazard, which is wrong. It should be the actual
2061  // instruction preceding the dbg_value instruction(s), just like it is
2062  // when debug info is not present.
2063  if (MI.isDebugInstr())
2064  return false;
2065 
2066  // Terminators and labels can't be scheduled around.
2067  if (MI.isTerminator() || MI.isPosition())
2068  return true;
2069 
2070  // INLINEASM_BR can jump to another block
2071  if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2072  return true;
2073 
2074  if (isSEHInstruction(MI))
2075  return true;
2076 
2077  // Treat the start of the IT block as a scheduling boundary, but schedule
2078  // t2IT along with all instructions following it.
2079  // FIXME: This is a big hammer. But the alternative is to add all potential
2080  // true and anti dependencies to IT block instructions as implicit operands
2081  // to the t2IT instruction. The added compile time and complexity does not
2082  // seem worth it.
2084  // Make sure to skip any debug instructions
2085  while (++I != MBB->end() && I->isDebugInstr())
2086  ;
2087  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2088  return true;
2089 
2090  // Don't attempt to schedule around any instruction that defines
2091  // a stack-oriented pointer, as it's unlikely to be profitable. This
2092  // saves compile time, because it doesn't require every single
2093  // stack slot reference to depend on the instruction that does the
2094  // modification.
2095  // Calls don't actually change the stack pointer, even if they have imp-defs.
2096  // No ARM calling conventions change the stack pointer. (X86 calling
2097  // conventions sometimes do).
2098  if (!MI.isCall() && MI.definesRegister(ARM::SP))
2099  return true;
2100 
2101  return false;
2102 }
2103 
2104 bool ARMBaseInstrInfo::
2106  unsigned NumCycles, unsigned ExtraPredCycles,
2107  BranchProbability Probability) const {
2108  if (!NumCycles)
2109  return false;
2110 
2111  // If we are optimizing for size, see if the branch in the predecessor can be
2112  // lowered to cbn?z by the constant island lowering pass, and return false if
2113  // so. This results in a shorter instruction sequence.
2114  if (MBB.getParent()->getFunction().hasOptSize()) {
2115  MachineBasicBlock *Pred = *MBB.pred_begin();
2116  if (!Pred->empty()) {
2117  MachineInstr *LastMI = &*Pred->rbegin();
2118  if (LastMI->getOpcode() == ARM::t2Bcc) {
2120  MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2121  if (CmpMI)
2122  return false;
2123  }
2124  }
2125  }
2126  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2127  MBB, 0, 0, Probability);
2128 }
2129 
2130 bool ARMBaseInstrInfo::
2132  unsigned TCycles, unsigned TExtra,
2133  MachineBasicBlock &FBB,
2134  unsigned FCycles, unsigned FExtra,
2135  BranchProbability Probability) const {
2136  if (!TCycles)
2137  return false;
2138 
2139  // In thumb code we often end up trading one branch for a IT block, and
2140  // if we are cloning the instruction can increase code size. Prevent
2141  // blocks with multiple predecesors from being ifcvted to prevent this
2142  // cloning.
2143  if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2144  if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2145  return false;
2146  }
2147 
2148  // Attempt to estimate the relative costs of predication versus branching.
2149  // Here we scale up each component of UnpredCost to avoid precision issue when
2150  // scaling TCycles/FCycles by Probability.
2151  const unsigned ScalingUpFactor = 1024;
2152 
2153  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2154  unsigned UnpredCost;
2155  if (!Subtarget.hasBranchPredictor()) {
2156  // When we don't have a branch predictor it's always cheaper to not take a
2157  // branch than take it, so we have to take that into account.
2158  unsigned NotTakenBranchCost = 1;
2159  unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2160  unsigned TUnpredCycles, FUnpredCycles;
2161  if (!FCycles) {
2162  // Triangle: TBB is the fallthrough
2163  TUnpredCycles = TCycles + NotTakenBranchCost;
2164  FUnpredCycles = TakenBranchCost;
2165  } else {
2166  // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2167  TUnpredCycles = TCycles + TakenBranchCost;
2168  FUnpredCycles = FCycles + NotTakenBranchCost;
2169  // The branch at the end of FBB will disappear when it's predicated, so
2170  // discount it from PredCost.
2171  PredCost -= 1 * ScalingUpFactor;
2172  }
2173  // The total cost is the cost of each path scaled by their probabilites
2174  unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2175  unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2176  UnpredCost = TUnpredCost + FUnpredCost;
2177  // When predicating assume that the first IT can be folded away but later
2178  // ones cost one cycle each
2179  if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2180  PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2181  }
2182  } else {
2183  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2184  unsigned FUnpredCost =
2185  Probability.getCompl().scale(FCycles * ScalingUpFactor);
2186  UnpredCost = TUnpredCost + FUnpredCost;
2187  UnpredCost += 1 * ScalingUpFactor; // The branch itself
2188  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2189  }
2190 
2191  return PredCost <= UnpredCost;
2192 }
2193 
2194 unsigned
2196  unsigned NumInsts) const {
2197  // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2198  // ARM has a condition code field in every predicable instruction, using it
2199  // doesn't change code size.
2200  if (!Subtarget.isThumb2())
2201  return 0;
2202 
2203  // It's possible that the size of the IT is restricted to a single block.
2204  unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2205  return divideCeil(NumInsts, MaxInsts) * 2;
2206 }
2207 
2208 unsigned
2210  // If this branch is likely to be folded into the comparison to form a
2211  // CB(N)Z, then removing it won't reduce code size at all, because that will
2212  // just replace the CB(N)Z with a CMP.
2213  if (MI.getOpcode() == ARM::t2Bcc &&
2215  return 0;
2216 
2217  unsigned Size = getInstSizeInBytes(MI);
2218 
2219  // For Thumb2, all branches are 32-bit instructions during the if conversion
2220  // pass, but may be replaced with 16-bit instructions during size reduction.
2221  // Since the branches considered by if conversion tend to be forward branches
2222  // over small basic blocks, they are very likely to be in range for the
2223  // narrow instructions, so we assume the final code size will be half what it
2224  // currently is.
2225  if (Subtarget.isThumb2())
2226  Size /= 2;
2227 
2228  return Size;
2229 }
2230 
2231 bool
2233  MachineBasicBlock &FMBB) const {
2234  // Reduce false anti-dependencies to let the target's out-of-order execution
2235  // engine do its thing.
2236  return Subtarget.isProfitableToUnpredicate();
2237 }
2238 
2239 /// getInstrPredicate - If instruction is predicated, returns its predicate
2240 /// condition, otherwise returns AL. It also returns the condition code
2241 /// register by reference.
2243  Register &PredReg) {
2244  int PIdx = MI.findFirstPredOperandIdx();
2245  if (PIdx == -1) {
2246  PredReg = 0;
2247  return ARMCC::AL;
2248  }
2249 
2250  PredReg = MI.getOperand(PIdx+1).getReg();
2251  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2252 }
2253 
2254 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2255  if (Opc == ARM::B)
2256  return ARM::Bcc;
2257  if (Opc == ARM::tB)
2258  return ARM::tBcc;
2259  if (Opc == ARM::t2B)
2260  return ARM::t2Bcc;
2261 
2262  llvm_unreachable("Unknown unconditional branch opcode!");
2263 }
2264 
2266  bool NewMI,
2267  unsigned OpIdx1,
2268  unsigned OpIdx2) const {
2269  switch (MI.getOpcode()) {
2270  case ARM::MOVCCr:
2271  case ARM::t2MOVCCr: {
2272  // MOVCC can be commuted by inverting the condition.
2273  Register PredReg;
2274  ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2275  // MOVCC AL can't be inverted. Shouldn't happen.
2276  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2277  return nullptr;
2278  MachineInstr *CommutedMI =
2279  TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2280  if (!CommutedMI)
2281  return nullptr;
2282  // After swapping the MOVCC operands, also invert the condition.
2283  CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2285  return CommutedMI;
2286  }
2287  }
2288  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2289 }
2290 
2291 /// Identify instructions that can be folded into a MOVCC instruction, and
2292 /// return the defining instruction.
2293 MachineInstr *
2294 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2295  const TargetInstrInfo *TII) const {
2296  if (!Reg.isVirtual())
2297  return nullptr;
2298  if (!MRI.hasOneNonDBGUse(Reg))
2299  return nullptr;
2301  if (!MI)
2302  return nullptr;
2303  // Check if MI can be predicated and folded into the MOVCC.
2304  if (!isPredicable(*MI))
2305  return nullptr;
2306  // Check if MI has any non-dead defs or physreg uses. This also detects
2307  // predicated instructions which will be reading CPSR.
2308  for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2309  // Reject frame index operands, PEI can't handle the predicated pseudos.
2310  if (MO.isFI() || MO.isCPI() || MO.isJTI())
2311  return nullptr;
2312  if (!MO.isReg())
2313  continue;
2314  // MI can't have any tied operands, that would conflict with predication.
2315  if (MO.isTied())
2316  return nullptr;
2317  if (Register::isPhysicalRegister(MO.getReg()))
2318  return nullptr;
2319  if (MO.isDef() && !MO.isDead())
2320  return nullptr;
2321  }
2322  bool DontMoveAcrossStores = true;
2323  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2324  return nullptr;
2325  return MI;
2326 }
2327 
2330  unsigned &TrueOp, unsigned &FalseOp,
2331  bool &Optimizable) const {
2332  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2333  "Unknown select instruction");
2334  // MOVCC operands:
2335  // 0: Def.
2336  // 1: True use.
2337  // 2: False use.
2338  // 3: Condition code.
2339  // 4: CPSR use.
2340  TrueOp = 1;
2341  FalseOp = 2;
2342  Cond.push_back(MI.getOperand(3));
2343  Cond.push_back(MI.getOperand(4));
2344  // We can always fold a def.
2345  Optimizable = true;
2346  return false;
2347 }
2348 
2349 MachineInstr *
2352  bool PreferFalse) const {
2353  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2354  "Unknown select instruction");
2355  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2356  MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2357  bool Invert = !DefMI;
2358  if (!DefMI)
2359  DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2360  if (!DefMI)
2361  return nullptr;
2362 
2363  // Find new register class to use.
2364  MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2365  MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2366  Register DestReg = MI.getOperand(0).getReg();
2367  const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2368  const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2369  if (!MRI.constrainRegClass(DestReg, FalseClass))
2370  return nullptr;
2371  if (!MRI.constrainRegClass(DestReg, TrueClass))
2372  return nullptr;
2373 
2374  // Create a new predicated version of DefMI.
2375  // Rfalse is the first use.
2376  MachineInstrBuilder NewMI =
2377  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2378 
2379  // Copy all the DefMI operands, excluding its (null) predicate.
2380  const MCInstrDesc &DefDesc = DefMI->getDesc();
2381  for (unsigned i = 1, e = DefDesc.getNumOperands();
2382  i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2383  NewMI.add(DefMI->getOperand(i));
2384 
2385  unsigned CondCode = MI.getOperand(3).getImm();
2386  if (Invert)
2388  else
2389  NewMI.addImm(CondCode);
2390  NewMI.add(MI.getOperand(4));
2391 
2392  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2393  if (NewMI->hasOptionalDef())
2394  NewMI.add(condCodeOp());
2395 
2396  // The output register value when the predicate is false is an implicit
2397  // register operand tied to the first def.
2398  // The tie makes the register allocator ensure the FalseReg is allocated the
2399  // same register as operand 0.
2400  FalseReg.setImplicit();
2401  NewMI.add(FalseReg);
2402  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2403 
2404  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2405  SeenMIs.insert(NewMI);
2406  SeenMIs.erase(DefMI);
2407 
2408  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2409  // DefMI would be invalid when tranferred inside the loop. Checking for a
2410  // loop is expensive, but at least remove kill flags if they are in different
2411  // BBs.
2412  if (DefMI->getParent() != MI.getParent())
2413  NewMI->clearKillInfo();
2414 
2415  // The caller will erase MI, but not DefMI.
2417  return NewMI;
2418 }
2419 
2420 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2421 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2422 /// def operand.
2423 ///
2424 /// This will go away once we can teach tblgen how to set the optional CPSR def
2425 /// operand itself.
2429 };
2430 
2432  {ARM::ADDSri, ARM::ADDri},
2433  {ARM::ADDSrr, ARM::ADDrr},
2434  {ARM::ADDSrsi, ARM::ADDrsi},
2435  {ARM::ADDSrsr, ARM::ADDrsr},
2436 
2437  {ARM::SUBSri, ARM::SUBri},
2438  {ARM::SUBSrr, ARM::SUBrr},
2439  {ARM::SUBSrsi, ARM::SUBrsi},
2440  {ARM::SUBSrsr, ARM::SUBrsr},
2441 
2442  {ARM::RSBSri, ARM::RSBri},
2443  {ARM::RSBSrsi, ARM::RSBrsi},
2444  {ARM::RSBSrsr, ARM::RSBrsr},
2445 
2446  {ARM::tADDSi3, ARM::tADDi3},
2447  {ARM::tADDSi8, ARM::tADDi8},
2448  {ARM::tADDSrr, ARM::tADDrr},
2449  {ARM::tADCS, ARM::tADC},
2450 
2451  {ARM::tSUBSi3, ARM::tSUBi3},
2452  {ARM::tSUBSi8, ARM::tSUBi8},
2453  {ARM::tSUBSrr, ARM::tSUBrr},
2454  {ARM::tSBCS, ARM::tSBC},
2455  {ARM::tRSBS, ARM::tRSB},
2456  {ARM::tLSLSri, ARM::tLSLri},
2457 
2458  {ARM::t2ADDSri, ARM::t2ADDri},
2459  {ARM::t2ADDSrr, ARM::t2ADDrr},
2460  {ARM::t2ADDSrs, ARM::t2ADDrs},
2461 
2462  {ARM::t2SUBSri, ARM::t2SUBri},
2463  {ARM::t2SUBSrr, ARM::t2SUBrr},
2464  {ARM::t2SUBSrs, ARM::t2SUBrs},
2465 
2466  {ARM::t2RSBSri, ARM::t2RSBri},
2467  {ARM::t2RSBSrs, ARM::t2RSBrs},
2468 };
2469 
2470 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2471  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2472  if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2474  return 0;
2475 }
2476 
2479  const DebugLoc &dl, Register DestReg,
2480  Register BaseReg, int NumBytes,
2481  ARMCC::CondCodes Pred, Register PredReg,
2482  const ARMBaseInstrInfo &TII,
2483  unsigned MIFlags) {
2484  if (NumBytes == 0 && DestReg != BaseReg) {
2485  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2486  .addReg(BaseReg, RegState::Kill)
2487  .add(predOps(Pred, PredReg))
2488  .add(condCodeOp())
2489  .setMIFlags(MIFlags);
2490  return;
2491  }
2492 
2493  bool isSub = NumBytes < 0;
2494  if (isSub) NumBytes = -NumBytes;
2495 
2496  while (NumBytes) {
2497  unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2498  unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2499  assert(ThisVal && "Didn't extract field correctly");
2500 
2501  // We will handle these bits from offset, clear them.
2502  NumBytes &= ~ThisVal;
2503 
2504  assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2505 
2506  // Build the new ADD / SUB.
2507  unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2508  BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2509  .addReg(BaseReg, RegState::Kill)
2510  .addImm(ThisVal)
2511  .add(predOps(Pred, PredReg))
2512  .add(condCodeOp())
2513  .setMIFlags(MIFlags);
2514  BaseReg = DestReg;
2515  }
2516 }
2517 
2520  unsigned NumBytes) {
2521  // This optimisation potentially adds lots of load and store
2522  // micro-operations, it's only really a great benefit to code-size.
2523  if (!Subtarget.hasMinSize())
2524  return false;
2525 
2526  // If only one register is pushed/popped, LLVM can use an LDR/STR
2527  // instead. We can't modify those so make sure we're dealing with an
2528  // instruction we understand.
2529  bool IsPop = isPopOpcode(MI->getOpcode());
2530  bool IsPush = isPushOpcode(MI->getOpcode());
2531  if (!IsPush && !IsPop)
2532  return false;
2533 
2534  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2535  MI->getOpcode() == ARM::VLDMDIA_UPD;
2536  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2537  MI->getOpcode() == ARM::tPOP ||
2538  MI->getOpcode() == ARM::tPOP_RET;
2539 
2540  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2541  MI->getOperand(1).getReg() == ARM::SP)) &&
2542  "trying to fold sp update into non-sp-updating push/pop");
2543 
2544  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2545  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2546  // if this is violated.
2547  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2548  return false;
2549 
2550  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2551  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2552  int RegListIdx = IsT1PushPop ? 2 : 4;
2553 
2554  // Calculate the space we'll need in terms of registers.
2555  unsigned RegsNeeded;
2556  const TargetRegisterClass *RegClass;
2557  if (IsVFPPushPop) {
2558  RegsNeeded = NumBytes / 8;
2559  RegClass = &ARM::DPRRegClass;
2560  } else {
2561  RegsNeeded = NumBytes / 4;
2562  RegClass = &ARM::GPRRegClass;
2563  }
2564 
2565  // We're going to have to strip all list operands off before
2566  // re-adding them since the order matters, so save the existing ones
2567  // for later.
2569 
2570  // We're also going to need the first register transferred by this
2571  // instruction, which won't necessarily be the first register in the list.
2572  unsigned FirstRegEnc = -1;
2573 
2575  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2576  MachineOperand &MO = MI->getOperand(i);
2577  RegList.push_back(MO);
2578 
2579  if (MO.isReg() && !MO.isImplicit() &&
2580  TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2581  FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2582  }
2583 
2584  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2585 
2586  // Now try to find enough space in the reglist to allocate NumBytes.
2587  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2588  --CurRegEnc) {
2589  unsigned CurReg = RegClass->getRegister(CurRegEnc);
2590  if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2591  continue;
2592  if (!IsPop) {
2593  // Pushing any register is completely harmless, mark the register involved
2594  // as undef since we don't care about its value and must not restore it
2595  // during stack unwinding.
2596  RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2597  false, false, true));
2598  --RegsNeeded;
2599  continue;
2600  }
2601 
2602  // However, we can only pop an extra register if it's not live. For
2603  // registers live within the function we might clobber a return value
2604  // register; the other way a register can be live here is if it's
2605  // callee-saved.
2606  if (isCalleeSavedRegister(CurReg, CSRegs) ||
2607  MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2609  // VFP pops don't allow holes in the register list, so any skip is fatal
2610  // for our transformation. GPR pops do, so we should just keep looking.
2611  if (IsVFPPushPop)
2612  return false;
2613  else
2614  continue;
2615  }
2616 
2617  // Mark the unimportant registers as <def,dead> in the POP.
2618  RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2619  true));
2620  --RegsNeeded;
2621  }
2622 
2623  if (RegsNeeded > 0)
2624  return false;
2625 
2626  // Finally we know we can profitably perform the optimisation so go
2627  // ahead: strip all existing registers off and add them back again
2628  // in the right order.
2629  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2630  MI->removeOperand(i);
2631 
2632  // Add the complete list back in.
2633  MachineInstrBuilder MIB(MF, &*MI);
2634  for (const MachineOperand &MO : llvm::reverse(RegList))
2635  MIB.add(MO);
2636 
2637  return true;
2638 }
2639 
2640 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2641  Register FrameReg, int &Offset,
2642  const ARMBaseInstrInfo &TII) {
2643  unsigned Opcode = MI.getOpcode();
2644  const MCInstrDesc &Desc = MI.getDesc();
2645  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2646  bool isSub = false;
2647 
2648  // Memory operands in inline assembly always use AddrMode2.
2649  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2651 
2652  if (Opcode == ARM::ADDri) {
2653  Offset += MI.getOperand(FrameRegIdx+1).getImm();
2654  if (Offset == 0) {
2655  // Turn it into a move.
2656  MI.setDesc(TII.get(ARM::MOVr));
2657  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2658  MI.removeOperand(FrameRegIdx+1);
2659  Offset = 0;
2660  return true;
2661  } else if (Offset < 0) {
2662  Offset = -Offset;
2663  isSub = true;
2664  MI.setDesc(TII.get(ARM::SUBri));
2665  }
2666 
2667  // Common case: small offset, fits into instruction.
2668  if (ARM_AM::getSOImmVal(Offset) != -1) {
2669  // Replace the FrameIndex with sp / fp
2670  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2671  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2672  Offset = 0;
2673  return true;
2674  }
2675 
2676  // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2677  // as possible.
2678  unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2679  unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2680 
2681  // We will handle these bits from offset, clear them.
2682  Offset &= ~ThisImmVal;
2683 
2684  // Get the properly encoded SOImmVal field.
2685  assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2686  "Bit extraction didn't work?");
2687  MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2688  } else {
2689  unsigned ImmIdx = 0;
2690  int InstrOffs = 0;
2691  unsigned NumBits = 0;
2692  unsigned Scale = 1;
2693  switch (AddrMode) {
2694  case ARMII::AddrMode_i12:
2695  ImmIdx = FrameRegIdx + 1;
2696  InstrOffs = MI.getOperand(ImmIdx).getImm();
2697  NumBits = 12;
2698  break;
2699  case ARMII::AddrMode2:
2700  ImmIdx = FrameRegIdx+2;
2701  InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2702  if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2703  InstrOffs *= -1;
2704  NumBits = 12;
2705  break;
2706  case ARMII::AddrMode3:
2707  ImmIdx = FrameRegIdx+2;
2708  InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2709  if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2710  InstrOffs *= -1;
2711  NumBits = 8;
2712  break;
2713  case ARMII::AddrMode4:
2714  case ARMII::AddrMode6:
2715  // Can't fold any offset even if it's zero.
2716  return false;
2717  case ARMII::AddrMode5:
2718  ImmIdx = FrameRegIdx+1;
2719  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2720  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2721  InstrOffs *= -1;
2722  NumBits = 8;
2723  Scale = 4;
2724  break;
2725  case ARMII::AddrMode5FP16:
2726  ImmIdx = FrameRegIdx+1;
2727  InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2728  if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2729  InstrOffs *= -1;
2730  NumBits = 8;
2731  Scale = 2;
2732  break;
2733  case ARMII::AddrModeT2_i7:
2736  ImmIdx = FrameRegIdx+1;
2737  InstrOffs = MI.getOperand(ImmIdx).getImm();
2738  NumBits = 7;
2739  Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2740  AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2741  break;
2742  default:
2743  llvm_unreachable("Unsupported addressing mode!");
2744  }
2745 
2746  Offset += InstrOffs * Scale;
2747  assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2748  if (Offset < 0) {
2749  Offset = -Offset;
2750  isSub = true;
2751  }
2752 
2753  // Attempt to fold address comp. if opcode has offset bits
2754  if (NumBits > 0) {
2755  // Common case: small offset, fits into instruction.
2756  MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2757  int ImmedOffset = Offset / Scale;
2758  unsigned Mask = (1 << NumBits) - 1;
2759  if ((unsigned)Offset <= Mask * Scale) {
2760  // Replace the FrameIndex with sp
2761  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2762  // FIXME: When addrmode2 goes away, this will simplify (like the
2763  // T2 version), as the LDR.i12 versions don't need the encoding
2764  // tricks for the offset value.
2765  if (isSub) {
2767  ImmedOffset = -ImmedOffset;
2768  else
2769  ImmedOffset |= 1 << NumBits;
2770  }
2771  ImmOp.ChangeToImmediate(ImmedOffset);
2772  Offset = 0;
2773  return true;
2774  }
2775 
2776  // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2777  ImmedOffset = ImmedOffset & Mask;
2778  if (isSub) {
2780  ImmedOffset = -ImmedOffset;
2781  else
2782  ImmedOffset |= 1 << NumBits;
2783  }
2784  ImmOp.ChangeToImmediate(ImmedOffset);
2785  Offset &= ~(Mask*Scale);
2786  }
2787  }
2788 
2789  Offset = (isSub) ? -Offset : Offset;
2790  return Offset == 0;
2791 }
2792 
2793 /// analyzeCompare - For a comparison instruction, return the source registers
2794 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2795 /// compares against in CmpValue. Return true if the comparison instruction
2796 /// can be analyzed.
2798  Register &SrcReg2, int64_t &CmpMask,
2799  int64_t &CmpValue) const {
2800  switch (MI.getOpcode()) {
2801  default: break;
2802  case ARM::CMPri:
2803  case ARM::t2CMPri:
2804  case ARM::tCMPi8:
2805  SrcReg = MI.getOperand(0).getReg();
2806  SrcReg2 = 0;
2807  CmpMask = ~0;
2808  CmpValue = MI.getOperand(1).getImm();
2809  return true;
2810  case ARM::CMPrr:
2811  case ARM::t2CMPrr:
2812  case ARM::tCMPr:
2813  SrcReg = MI.getOperand(0).getReg();
2814  SrcReg2 = MI.getOperand(1).getReg();
2815  CmpMask = ~0;
2816  CmpValue = 0;
2817  return true;
2818  case ARM::TSTri:
2819  case ARM::t2TSTri:
2820  SrcReg = MI.getOperand(0).getReg();
2821  SrcReg2 = 0;
2822  CmpMask = MI.getOperand(1).getImm();
2823  CmpValue = 0;
2824  return true;
2825  }
2826 
2827  return false;
2828 }
2829 
2830 /// isSuitableForMask - Identify a suitable 'and' instruction that
2831 /// operates on the given source register and applies the same mask
2832 /// as a 'tst' instruction. Provide a limited look-through for copies.
2833 /// When successful, MI will hold the found instruction.
2835  int CmpMask, bool CommonUse) {
2836  switch (MI->getOpcode()) {
2837  case ARM::ANDri:
2838  case ARM::t2ANDri:
2839  if (CmpMask != MI->getOperand(2).getImm())
2840  return false;
2841  if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2842  return true;
2843  break;
2844  }
2845 
2846  return false;
2847 }
2848 
2849 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2850 /// the condition code if we modify the instructions such that flags are
2851 /// set by ADD(a,b,X).
2853  switch (CC) {
2854  default: return ARMCC::AL;
2855  case ARMCC::HS: return ARMCC::LO;
2856  case ARMCC::LO: return ARMCC::HS;
2857  case ARMCC::VS: return ARMCC::VS;
2858  case ARMCC::VC: return ARMCC::VC;
2859  }
2860 }
2861 
2862 /// isRedundantFlagInstr - check whether the first instruction, whose only
2863 /// purpose is to update flags, can be made redundant.
2864 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2865 /// CMPri can be made redundant by SUBri if the operands are the same.
2866 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2867 /// This function can be extended later on.
2868 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2869  Register SrcReg, Register SrcReg2,
2870  int64_t ImmValue,
2871  const MachineInstr *OI,
2872  bool &IsThumb1) {
2873  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2874  (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2875  ((OI->getOperand(1).getReg() == SrcReg &&
2876  OI->getOperand(2).getReg() == SrcReg2) ||
2877  (OI->getOperand(1).getReg() == SrcReg2 &&
2878  OI->getOperand(2).getReg() == SrcReg))) {
2879  IsThumb1 = false;
2880  return true;
2881  }
2882 
2883  if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2884  ((OI->getOperand(2).getReg() == SrcReg &&
2885  OI->getOperand(3).getReg() == SrcReg2) ||
2886  (OI->getOperand(2).getReg() == SrcReg2 &&
2887  OI->getOperand(3).getReg() == SrcReg))) {
2888  IsThumb1 = true;
2889  return true;
2890  }
2891 
2892  if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2893  (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2894  OI->getOperand(1).getReg() == SrcReg &&
2895  OI->getOperand(2).getImm() == ImmValue) {
2896  IsThumb1 = false;
2897  return true;
2898  }
2899 
2900  if (CmpI->getOpcode() == ARM::tCMPi8 &&
2901  (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2902  OI->getOperand(2).getReg() == SrcReg &&
2903  OI->getOperand(3).getImm() == ImmValue) {
2904  IsThumb1 = true;
2905  return true;
2906  }
2907 
2908  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2909  (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2910  OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2911  OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2912  OI->getOperand(0).getReg() == SrcReg &&
2913  OI->getOperand(1).getReg() == SrcReg2) {
2914  IsThumb1 = false;
2915  return true;
2916  }
2917 
2918  if (CmpI->getOpcode() == ARM::tCMPr &&
2919  (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2920  OI->getOpcode() == ARM::tADDrr) &&
2921  OI->getOperand(0).getReg() == SrcReg &&
2922  OI->getOperand(2).getReg() == SrcReg2) {
2923  IsThumb1 = true;
2924  return true;
2925  }
2926 
2927  return false;
2928 }
2929 
2930 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2931  switch (MI->getOpcode()) {
2932  default: return false;
2933  case ARM::tLSLri:
2934  case ARM::tLSRri:
2935  case ARM::tLSLrr:
2936  case ARM::tLSRrr:
2937  case ARM::tSUBrr:
2938  case ARM::tADDrr:
2939  case ARM::tADDi3:
2940  case ARM::tADDi8:
2941  case ARM::tSUBi3:
2942  case ARM::tSUBi8:
2943  case ARM::tMUL:
2944  case ARM::tADC:
2945  case ARM::tSBC:
2946  case ARM::tRSB:
2947  case ARM::tAND:
2948  case ARM::tORR:
2949  case ARM::tEOR:
2950  case ARM::tBIC:
2951  case ARM::tMVN:
2952  case ARM::tASRri:
2953  case ARM::tASRrr:
2954  case ARM::tROR:
2955  IsThumb1 = true;
2957  case ARM::RSBrr:
2958  case ARM::RSBri:
2959  case ARM::RSCrr:
2960  case ARM::RSCri:
2961  case ARM::ADDrr:
2962  case ARM::ADDri:
2963  case ARM::ADCrr:
2964  case ARM::ADCri:
2965  case ARM::SUBrr:
2966  case ARM::SUBri:
2967  case ARM::SBCrr:
2968  case ARM::SBCri:
2969  case ARM::t2RSBri:
2970  case ARM::t2ADDrr:
2971  case ARM::t2ADDri:
2972  case ARM::t2ADCrr:
2973  case ARM::t2ADCri:
2974  case ARM::t2SUBrr:
2975  case ARM::t2SUBri:
2976  case ARM::t2SBCrr:
2977  case ARM::t2SBCri:
2978  case ARM::ANDrr:
2979  case ARM::ANDri:
2980  case ARM::t2ANDrr:
2981  case ARM::t2ANDri:
2982  case ARM::ORRrr:
2983  case ARM::ORRri:
2984  case ARM::t2ORRrr:
2985  case ARM::t2ORRri:
2986  case ARM::EORrr:
2987  case ARM::EORri:
2988  case ARM::t2EORrr:
2989  case ARM::t2EORri:
2990  case ARM::t2LSRri:
2991  case ARM::t2LSRrr:
2992  case ARM::t2LSLri:
2993  case ARM::t2LSLrr:
2994  return true;
2995  }
2996 }
2997 
2998 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2999 /// comparison into one that sets the zero bit in the flags register;
3000 /// Remove a redundant Compare instruction if an earlier instruction can set the
3001 /// flags in the same way as Compare.
3002 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3003 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3004 /// condition code of instructions which use the flags.
3006  MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3007  int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3008  // Get the unique definition of SrcReg.
3009  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3010  if (!MI) return false;
3011 
3012  // Masked compares sometimes use the same register as the corresponding 'and'.
3013  if (CmpMask != ~0) {
3014  if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3015  MI = nullptr;
3017  UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3018  UI != UE; ++UI) {
3019  if (UI->getParent() != CmpInstr.getParent())
3020  continue;
3021  MachineInstr *PotentialAND = &*UI;
3022  if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3023  isPredicated(*PotentialAND))
3024  continue;
3025  MI = PotentialAND;
3026  break;
3027  }
3028  if (!MI) return false;
3029  }
3030  }
3031 
3032  // Get ready to iterate backward from CmpInstr.
3033  MachineBasicBlock::iterator I = CmpInstr, E = MI,
3034  B = CmpInstr.getParent()->begin();
3035 
3036  // Early exit if CmpInstr is at the beginning of the BB.
3037  if (I == B) return false;
3038 
3039  // There are two possible candidates which can be changed to set CPSR:
3040  // One is MI, the other is a SUB or ADD instruction.
3041  // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3042  // ADDr[ri](r1, r2, X).
3043  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3044  MachineInstr *SubAdd = nullptr;
3045  if (SrcReg2 != 0)
3046  // MI is not a candidate for CMPrr.
3047  MI = nullptr;
3048  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3049  // Conservatively refuse to convert an instruction which isn't in the same
3050  // BB as the comparison.
3051  // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3052  // Thus we cannot return here.
3053  if (CmpInstr.getOpcode() == ARM::CMPri ||
3054  CmpInstr.getOpcode() == ARM::t2CMPri ||
3055  CmpInstr.getOpcode() == ARM::tCMPi8)
3056  MI = nullptr;
3057  else
3058  return false;
3059  }
3060 
3061  bool IsThumb1 = false;
3062  if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3063  return false;
3064 
3065  // We also want to do this peephole for cases like this: if (a*b == 0),
3066  // and optimise away the CMP instruction from the generated code sequence:
3067  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3068  // resulting from the select instruction, but these MOVS instructions for
3069  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3070  // However, if we only have MOVS instructions in between the CMP and the
3071  // other instruction (the MULS in this example), then the CPSR is dead so we
3072  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3073  // reordering and then continue the analysis hoping we can eliminate the
3074  // CMP. This peephole works on the vregs, so is still in SSA form. As a
3075  // consequence, the movs won't redefine/kill the MUL operands which would
3076  // make this reordering illegal.
3078  if (MI && IsThumb1) {
3079  --I;
3080  if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3081  bool CanReorder = true;
3082  for (; I != E; --I) {
3083  if (I->getOpcode() != ARM::tMOVi8) {
3084  CanReorder = false;
3085  break;
3086  }
3087  }
3088  if (CanReorder) {
3089  MI = MI->removeFromParent();
3090  E = CmpInstr;
3091  CmpInstr.getParent()->insert(E, MI);
3092  }
3093  }
3094  I = CmpInstr;
3095  E = MI;
3096  }
3097 
3098  // Check that CPSR isn't set between the comparison instruction and the one we
3099  // want to change. At the same time, search for SubAdd.
3100  bool SubAddIsThumb1 = false;
3101  do {
3102  const MachineInstr &Instr = *--I;
3103 
3104  // Check whether CmpInstr can be made redundant by the current instruction.
3105  if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3106  SubAddIsThumb1)) {
3107  SubAdd = &*I;
3108  break;
3109  }
3110 
3111  // Allow E (which was initially MI) to be SubAdd but do not search before E.
3112  if (I == E)
3113  break;
3114 
3115  if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3116  Instr.readsRegister(ARM::CPSR, TRI))
3117  // This instruction modifies or uses CPSR after the one we want to
3118  // change. We can't do this transformation.
3119  return false;
3120 
3121  if (I == B) {
3122  // In some cases, we scan the use-list of an instruction for an AND;
3123  // that AND is in the same BB, but may not be scheduled before the
3124  // corresponding TST. In that case, bail out.
3125  //
3126  // FIXME: We could try to reschedule the AND.
3127  return false;
3128  }
3129  } while (true);
3130 
3131  // Return false if no candidates exist.
3132  if (!MI && !SubAdd)
3133  return false;
3134 
3135  // If we found a SubAdd, use it as it will be closer to the CMP
3136  if (SubAdd) {
3137  MI = SubAdd;
3138  IsThumb1 = SubAddIsThumb1;
3139  }
3140 
3141  // We can't use a predicated instruction - it doesn't always write the flags.
3142  if (isPredicated(*MI))
3143  return false;
3144 
3145  // Scan forward for the use of CPSR
3146  // When checking against MI: if it's a conditional code that requires
3147  // checking of the V bit or C bit, then this is not safe to do.
3148  // It is safe to remove CmpInstr if CPSR is redefined or killed.
3149  // If we are done with the basic block, we need to check whether CPSR is
3150  // live-out.
3152  OperandsToUpdate;
3153  bool isSafe = false;
3154  I = CmpInstr;
3155  E = CmpInstr.getParent()->end();
3156  while (!isSafe && ++I != E) {
3157  const MachineInstr &Instr = *I;
3158  for (unsigned IO = 0, EO = Instr.getNumOperands();
3159  !isSafe && IO != EO; ++IO) {
3160  const MachineOperand &MO = Instr.getOperand(IO);
3161  if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3162  isSafe = true;
3163  break;
3164  }
3165  if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3166  continue;
3167  if (MO.isDef()) {
3168  isSafe = true;
3169  break;
3170  }
3171  // Condition code is after the operand before CPSR except for VSELs.
3172  ARMCC::CondCodes CC;
3173  bool IsInstrVSel = true;
3174  switch (Instr.getOpcode()) {
3175  default:
3176  IsInstrVSel = false;
3177  CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3178  break;
3179  case ARM::VSELEQD:
3180  case ARM::VSELEQS:
3181  case ARM::VSELEQH:
3182  CC = ARMCC::EQ;
3183  break;
3184  case ARM::VSELGTD:
3185  case ARM::VSELGTS:
3186  case ARM::VSELGTH:
3187  CC = ARMCC::GT;
3188  break;
3189  case ARM::VSELGED:
3190  case ARM::VSELGES:
3191  case ARM::VSELGEH:
3192  CC = ARMCC::GE;
3193  break;
3194  case ARM::VSELVSD:
3195  case ARM::VSELVSS:
3196  case ARM::VSELVSH:
3197  CC = ARMCC::VS;
3198  break;
3199  }
3200 
3201  if (SubAdd) {
3202  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3203  // on CMP needs to be updated to be based on SUB.
3204  // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3205  // needs to be modified.
3206  // Push the condition code operands to OperandsToUpdate.
3207  // If it is safe to remove CmpInstr, the condition code of these
3208  // operands will be modified.
3209  unsigned Opc = SubAdd->getOpcode();
3210  bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3211  Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3212  Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3213  Opc == ARM::tSUBi8;
3214  unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3215  if (!IsSub ||
3216  (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3217  SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3218  // VSel doesn't support condition code update.
3219  if (IsInstrVSel)
3220  return false;
3221  // Ensure we can swap the condition.
3222  ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3223  if (NewCC == ARMCC::AL)
3224  return false;
3225  OperandsToUpdate.push_back(
3226  std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3227  }
3228  } else {
3229  // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3230  switch (CC) {
3231  case ARMCC::EQ: // Z
3232  case ARMCC::NE: // Z
3233  case ARMCC::MI: // N
3234  case ARMCC::PL: // N
3235  case ARMCC::AL: // none
3236  // CPSR can be used multiple times, we should continue.
3237  break;
3238  case ARMCC::HS: // C
3239  case ARMCC::LO: // C
3240  case ARMCC::VS: // V
3241  case ARMCC::VC: // V
3242  case ARMCC::HI: // C Z
3243  case ARMCC::LS: // C Z
3244  case ARMCC::GE: // N V
3245  case ARMCC::LT: // N V
3246  case ARMCC::GT: // Z N V
3247  case ARMCC::LE: // Z N V
3248  // The instruction uses the V bit or C bit which is not safe.
3249  return false;
3250  }
3251  }
3252  }
3253  }
3254 
3255  // If CPSR is not killed nor re-defined, we should check whether it is
3256  // live-out. If it is live-out, do not optimize.
3257  if (!isSafe) {
3258  MachineBasicBlock *MBB = CmpInstr.getParent();
3259  for (MachineBasicBlock *Succ : MBB->successors())
3260  if (Succ->isLiveIn(ARM::CPSR))
3261  return false;
3262  }
3263 
3264  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3265  // set CPSR so this is represented as an explicit output)
3266  if (!IsThumb1) {
3267  MI->getOperand(5).setReg(ARM::CPSR);
3268  MI->getOperand(5).setIsDef(true);
3269  }
3270  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3271  CmpInstr.eraseFromParent();
3272 
3273  // Modify the condition code of operands in OperandsToUpdate.
3274  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3275  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3276  for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3277  OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3278 
3279  MI->clearRegisterDeads(ARM::CPSR);
3280 
3281  return true;
3282 }
3283 
3285  // Do not sink MI if it might be used to optimize a redundant compare.
3286  // We heuristically only look at the instruction immediately following MI to
3287  // avoid potentially searching the entire basic block.
3288  if (isPredicated(MI))
3289  return true;
3291  ++Next;
3292  Register SrcReg, SrcReg2;
3293  int64_t CmpMask, CmpValue;
3294  bool IsThumb1;
3295  if (Next != MI.getParent()->end() &&
3296  analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3297  isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3298  return false;
3299  return true;
3300 }
3301 
3303  Register Reg,
3304  MachineRegisterInfo *MRI) const {
3305  // Fold large immediates into add, sub, or, xor.
3306  unsigned DefOpc = DefMI.getOpcode();
3307  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
3308  return false;
3309  if (!DefMI.getOperand(1).isImm())
3310  // Could be t2MOVi32imm @xx
3311  return false;
3312 
3313  if (!MRI->hasOneNonDBGUse(Reg))
3314  return false;
3315 
3316  const MCInstrDesc &DefMCID = DefMI.getDesc();
3317  if (DefMCID.hasOptionalDef()) {
3318  unsigned NumOps = DefMCID.getNumOperands();
3319  const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3320  if (MO.getReg() == ARM::CPSR && !MO.isDead())
3321  // If DefMI defines CPSR and it is not dead, it's obviously not safe
3322  // to delete DefMI.
3323  return false;
3324  }
3325 
3326  const MCInstrDesc &UseMCID = UseMI.getDesc();
3327  if (UseMCID.hasOptionalDef()) {
3328  unsigned NumOps = UseMCID.getNumOperands();
3329  if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3330  // If the instruction sets the flag, do not attempt this optimization
3331  // since it may change the semantics of the code.
3332  return false;
3333  }
3334 
3335  unsigned UseOpc = UseMI.getOpcode();
3336  unsigned NewUseOpc = 0;
3337  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3338  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3339  bool Commute = false;
3340  switch (UseOpc) {
3341  default: return false;
3342  case ARM::SUBrr:
3343  case ARM::ADDrr:
3344  case ARM::ORRrr:
3345  case ARM::EORrr:
3346  case ARM::t2SUBrr:
3347  case ARM::t2ADDrr:
3348  case ARM::t2ORRrr:
3349  case ARM::t2EORrr: {
3350  Commute = UseMI.getOperand(2).getReg() != Reg;
3351  switch (UseOpc) {
3352  default: break;
3353  case ARM::ADDrr:
3354  case ARM::SUBrr:
3355  if (UseOpc == ARM::SUBrr && Commute)
3356  return false;
3357 
3358  // ADD/SUB are special because they're essentially the same operation, so
3359  // we can handle a larger range of immediates.
3360  if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3361  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3362  else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3363  ImmVal = -ImmVal;
3364  NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3365  } else
3366  return false;
3367  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3368  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3369  break;
3370  case ARM::ORRrr:
3371  case ARM::EORrr:
3372  if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3373  return false;
3374  SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3375  SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3376  switch (UseOpc) {
3377  default: break;
3378  case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3379  case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3380  }
3381  break;
3382  case ARM::t2ADDrr:
3383  case ARM::t2SUBrr: {
3384  if (UseOpc == ARM::t2SUBrr && Commute)
3385  return false;
3386 
3387  // ADD/SUB are special because they're essentially the same operation, so
3388  // we can handle a larger range of immediates.
3389  const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3390  const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3391  const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3392  if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3393  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3394  else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3395  ImmVal = -ImmVal;
3396  NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3397  } else
3398  return false;
3399  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3400  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3401  break;
3402  }
3403  case ARM::t2ORRrr:
3404  case ARM::t2EORrr:
3405  if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3406  return false;
3407  SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3408  SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3409  switch (UseOpc) {
3410  default: break;
3411  case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3412  case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3413  }
3414  break;
3415  }
3416  }
3417  }
3418 
3419  unsigned OpIdx = Commute ? 2 : 1;
3420  Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3421  bool isKill = UseMI.getOperand(OpIdx).isKill();
3422  const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3423  Register NewReg = MRI->createVirtualRegister(TRC);
3424  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3425  NewReg)
3426  .addReg(Reg1, getKillRegState(isKill))
3427  .addImm(SOImmValV1)
3428  .add(predOps(ARMCC::AL))
3429  .add(condCodeOp());
3430  UseMI.setDesc(get(NewUseOpc));
3431  UseMI.getOperand(1).setReg(NewReg);
3432  UseMI.getOperand(1).setIsKill();
3433  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3434  DefMI.eraseFromParent();
3435  // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3436  // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3437  // Then the below code will not be needed, as the input/output register
3438  // classes will be rgpr or gprSP.
3439  // For now, we fix the UseMI operand explicitly here:
3440  switch(NewUseOpc){
3441  case ARM::t2ADDspImm:
3442  case ARM::t2SUBspImm:
3443  case ARM::t2ADDri:
3444  case ARM::t2SUBri:
3445  MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3446  }
3447  return true;
3448 }
3449 
3450 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3451  const MachineInstr &MI) {
3452  switch (MI.getOpcode()) {
3453  default: {
3454  const MCInstrDesc &Desc = MI.getDesc();
3455  int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3456  assert(UOps >= 0 && "bad # UOps");
3457  return UOps;
3458  }
3459 
3460  case ARM::LDRrs:
3461  case ARM::LDRBrs:
3462  case ARM::STRrs:
3463  case ARM::STRBrs: {
3464  unsigned ShOpVal = MI.getOperand(3).getImm();
3465  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3466  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3467  if (!isSub &&
3468  (ShImm == 0 ||
3469  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3470  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3471  return 1;
3472  return 2;
3473  }
3474 
3475  case ARM::LDRH:
3476  case ARM::STRH: {
3477  if (!MI.getOperand(2).getReg())
3478  return 1;
3479 
3480  unsigned ShOpVal = MI.getOperand(3).getImm();
3481  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3482  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3483  if (!isSub &&
3484  (ShImm == 0 ||
3485  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3486  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3487  return 1;
3488  return 2;
3489  }
3490 
3491  case ARM::LDRSB:
3492  case ARM::LDRSH:
3493  return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3494 
3495  case ARM::LDRSB_POST:
3496  case ARM::LDRSH_POST: {
3497  Register Rt = MI.getOperand(0).getReg();
3498  Register Rm = MI.getOperand(3).getReg();
3499  return (Rt == Rm) ? 4 : 3;
3500  }
3501 
3502  case ARM::LDR_PRE_REG:
3503  case ARM::LDRB_PRE_REG: {
3504  Register Rt = MI.getOperand(0).getReg();
3505  Register Rm = MI.getOperand(3).getReg();
3506  if (Rt == Rm)
3507  return 3;
3508  unsigned ShOpVal = MI.getOperand(4).getImm();
3509  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3510  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3511  if (!isSub &&
3512  (ShImm == 0 ||
3513  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3514  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3515  return 2;
3516  return 3;
3517  }
3518 
3519  case ARM::STR_PRE_REG:
3520  case ARM::STRB_PRE_REG: {
3521  unsigned ShOpVal = MI.getOperand(4).getImm();
3522  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3523  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3524  if (!isSub &&
3525  (ShImm == 0 ||
3526  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3527  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3528  return 2;
3529  return 3;
3530  }
3531 
3532  case ARM::LDRH_PRE:
3533  case ARM::STRH_PRE: {
3534  Register Rt = MI.getOperand(0).getReg();
3535  Register Rm = MI.getOperand(3).getReg();
3536  if (!Rm)
3537  return 2;
3538  if (Rt == Rm)
3539  return 3;
3540  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3541  }
3542 
3543  case ARM::LDR_POST_REG:
3544  case ARM::LDRB_POST_REG:
3545  case ARM::LDRH_POST: {
3546  Register Rt = MI.getOperand(0).getReg();
3547  Register Rm = MI.getOperand(3).getReg();
3548  return (Rt == Rm) ? 3 : 2;
3549  }
3550 
3551  case ARM::LDR_PRE_IMM:
3552  case ARM::LDRB_PRE_IMM:
3553  case ARM::LDR_POST_IMM:
3554  case ARM::LDRB_POST_IMM:
3555  case ARM::STRB_POST_IMM:
3556  case ARM::STRB_POST_REG:
3557  case ARM::STRB_PRE_IMM:
3558  case ARM::STRH_POST:
3559  case ARM::STR_POST_IMM:
3560  case ARM::STR_POST_REG:
3561  case ARM::STR_PRE_IMM:
3562  return 2;
3563 
3564  case ARM::LDRSB_PRE:
3565  case ARM::LDRSH_PRE: {
3566  Register Rm = MI.getOperand(3).getReg();
3567  if (Rm == 0)
3568  return 3;
3569  Register Rt = MI.getOperand(0).getReg();
3570  if (Rt == Rm)
3571  return 4;
3572  unsigned ShOpVal = MI.getOperand(4).getImm();
3573  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3574  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3575  if (!isSub &&
3576  (ShImm == 0 ||
3577  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3578  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3579  return 3;
3580  return 4;
3581  }
3582 
3583  case ARM::LDRD: {
3584  Register Rt = MI.getOperand(0).getReg();
3585  Register Rn = MI.getOperand(2).getReg();
3586  Register Rm = MI.getOperand(3).getReg();
3587  if (Rm)
3588  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3589  : 3;
3590  return (Rt == Rn) ? 3 : 2;
3591  }
3592 
3593  case ARM::STRD: {
3594  Register Rm = MI.getOperand(3).getReg();
3595  if (Rm)
3596  return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3597  : 3;
3598  return 2;
3599  }
3600 
3601  case ARM::LDRD_POST:
3602  case ARM::t2LDRD_POST:
3603  return 3;
3604 
3605  case ARM::STRD_POST:
3606  case ARM::t2STRD_POST:
3607  return 4;
3608 
3609  case ARM::LDRD_PRE: {
3610  Register Rt = MI.getOperand(0).getReg();
3611  Register Rn = MI.getOperand(3).getReg();
3612  Register Rm = MI.getOperand(4).getReg();
3613  if (Rm)
3614  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3615  : 4;
3616  return (Rt == Rn) ? 4 : 3;
3617  }
3618 
3619  case ARM::t2LDRD_PRE: {
3620  Register Rt = MI.getOperand(0).getReg();
3621  Register Rn = MI.getOperand(3).getReg();
3622  return (Rt == Rn) ? 4 : 3;
3623  }
3624 
3625  case ARM::STRD_PRE: {
3626  Register Rm = MI.getOperand(4).getReg();
3627  if (Rm)
3628  return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3629  : 4;
3630  return 3;
3631  }
3632 
3633  case ARM::t2STRD_PRE:
3634  return 3;
3635 
3636  case ARM::t2LDR_POST:
3637  case ARM::t2LDRB_POST:
3638  case ARM::t2LDRB_PRE:
3639  case ARM::t2LDRSBi12:
3640  case ARM::t2LDRSBi8:
3641  case ARM::t2LDRSBpci:
3642  case ARM::t2LDRSBs:
3643  case ARM::t2LDRH_POST:
3644  case ARM::t2LDRH_PRE:
3645  case ARM::t2LDRSBT:
3646  case ARM::t2LDRSB_POST:
3647  case ARM::t2LDRSB_PRE:
3648  case ARM::t2LDRSH_POST:
3649  case ARM::t2LDRSH_PRE:
3650  case ARM::t2LDRSHi12:
3651  case ARM::t2LDRSHi8:
3652  case ARM::t2LDRSHpci:
3653  case ARM::t2LDRSHs:
3654  return 2;
3655 
3656  case ARM::t2LDRDi8: {
3657  Register Rt = MI.getOperand(0).getReg();
3658  Register Rn = MI.getOperand(2).getReg();
3659  return (Rt == Rn) ? 3 : 2;
3660  }
3661 
3662  case ARM::t2STRB_POST:
3663  case ARM::t2STRB_PRE:
3664  case ARM::t2STRBs:
3665  case ARM::t2STRDi8:
3666  case ARM::t2STRH_POST:
3667  case ARM::t2STRH_PRE:
3668  case ARM::t2STRHs:
3669  case ARM::t2STR_POST:
3670  case ARM::t2STR_PRE:
3671  case ARM::t2STRs:
3672  return 2;
3673  }
3674 }
3675 
3676 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3677 // can't be easily determined return 0 (missing MachineMemOperand).
3678 //
3679 // FIXME: The current MachineInstr design does not support relying on machine
3680 // mem operands to determine the width of a memory access. Instead, we expect
3681 // the target to provide this information based on the instruction opcode and
3682 // operands. However, using MachineMemOperand is the best solution now for
3683 // two reasons:
3684 //
3685 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3686 // operands. This is much more dangerous than using the MachineMemOperand
3687 // sizes because CodeGen passes can insert/remove optional machine operands. In
3688 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3689 // postRA passes as well.
3690 //
3691 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3692 // machine model that calls this should handle the unknown (zero size) case.
3693 //
3694 // Long term, we should require a target hook that verifies MachineMemOperand
3695 // sizes during MC lowering. That target hook should be local to MC lowering
3696 // because we can't ensure that it is aware of other MI forms. Doing this will
3697 // ensure that MachineMemOperands are correctly propagated through all passes.
3699  unsigned Size = 0;
3700  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3701  E = MI.memoperands_end();
3702  I != E; ++I) {
3703  Size += (*I)->getSize();
3704  }
3705  // FIXME: The scheduler currently can't handle values larger than 16. But
3706  // the values can actually go up to 32 for floating-point load/store
3707  // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3708  // operations isn't right; we could end up with "extra" memory operands for
3709  // various reasons, like tail merge merging two memory operations.
3710  return std::min(Size / 4, 16U);
3711 }
3712 
3713 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3714  unsigned NumRegs) {
3715  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3716  switch (Opc) {
3717  default:
3718  break;
3719  case ARM::VLDMDIA_UPD:
3720  case ARM::VLDMDDB_UPD:
3721  case ARM::VLDMSIA_UPD:
3722  case ARM::VLDMSDB_UPD:
3723  case ARM::VSTMDIA_UPD:
3724  case ARM::VSTMDDB_UPD:
3725  case ARM::VSTMSIA_UPD:
3726  case ARM::VSTMSDB_UPD:
3727  case ARM::LDMIA_UPD:
3728  case ARM::LDMDA_UPD:
3729  case ARM::LDMDB_UPD:
3730  case ARM::LDMIB_UPD:
3731  case ARM::STMIA_UPD:
3732  case ARM::STMDA_UPD:
3733  case ARM::STMDB_UPD:
3734  case ARM::STMIB_UPD:
3735  case ARM::tLDMIA_UPD:
3736  case ARM::tSTMIA_UPD:
3737  case ARM::t2LDMIA_UPD:
3738  case ARM::t2LDMDB_UPD:
3739  case ARM::t2STMIA_UPD:
3740  case ARM::t2STMDB_UPD:
3741  ++UOps; // One for base register writeback.
3742  break;
3743  case ARM::LDMIA_RET:
3744  case ARM::tPOP_RET:
3745  case ARM::t2LDMIA_RET:
3746  UOps += 2; // One for base reg wb, one for write to pc.
3747  break;
3748  }
3749  return UOps;
3750 }
3751 
3753  const MachineInstr &MI) const {
3754  if (!ItinData || ItinData->isEmpty())
3755  return 1;
3756 
3757  const MCInstrDesc &Desc = MI.getDesc();
3758  unsigned Class = Desc.getSchedClass();
3759  int ItinUOps = ItinData->getNumMicroOps(Class);
3760  if (ItinUOps >= 0) {
3761  if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3762  return getNumMicroOpsSwiftLdSt(ItinData, MI);
3763 
3764  return ItinUOps;
3765  }
3766 
3767  unsigned Opc = MI.getOpcode();
3768  switch (Opc) {
3769  default:
3770  llvm_unreachable("Unexpected multi-uops instruction!");
3771  case ARM::VLDMQIA:
3772  case ARM::VSTMQIA:
3773  return 2;
3774 
3775  // The number of uOps for load / store multiple are determined by the number
3776  // registers.
3777  //
3778  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3779  // same cycle. The scheduling for the first load / store must be done
3780  // separately by assuming the address is not 64-bit aligned.
3781  //
3782  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3783  // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3784  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3785  case ARM::VLDMDIA:
3786  case ARM::VLDMDIA_UPD:
3787  case ARM::VLDMDDB_UPD:
3788  case ARM::VLDMSIA:
3789  case ARM::VLDMSIA_UPD:
3790  case ARM::VLDMSDB_UPD:
3791  case ARM::VSTMDIA:
3792  case ARM::VSTMDIA_UPD:
3793  case ARM::VSTMDDB_UPD:
3794  case ARM::VSTMSIA:
3795  case ARM::VSTMSIA_UPD:
3796  case ARM::VSTMSDB_UPD: {
3797  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3798  return (NumRegs / 2) + (NumRegs % 2) + 1;
3799  }
3800 
3801  case ARM::LDMIA_RET:
3802  case ARM::LDMIA:
3803  case ARM::LDMDA:
3804  case ARM::LDMDB:
3805  case ARM::LDMIB:
3806  case ARM::LDMIA_UPD:
3807  case ARM::LDMDA_UPD:
3808  case ARM::LDMDB_UPD:
3809  case ARM::LDMIB_UPD:
3810  case ARM::STMIA:
3811  case ARM::STMDA:
3812  case ARM::STMDB:
3813  case ARM::STMIB:
3814  case ARM::STMIA_UPD:
3815  case ARM::STMDA_UPD:
3816  case ARM::STMDB_UPD:
3817  case ARM::STMIB_UPD:
3818  case ARM::tLDMIA:
3819  case ARM::tLDMIA_UPD:
3820  case ARM::tSTMIA_UPD:
3821  case ARM::tPOP_RET:
3822  case ARM::tPOP:
3823  case ARM::tPUSH:
3824  case ARM::t2LDMIA_RET:
3825  case ARM::t2LDMIA:
3826  case ARM::t2LDMDB:
3827  case ARM::t2LDMIA_UPD:
3828  case ARM::t2LDMDB_UPD:
3829  case ARM::t2STMIA:
3830  case ARM::t2STMDB:
3831  case ARM::t2STMIA_UPD:
3832  case ARM::t2STMDB_UPD: {
3833  unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3834  switch (Subtarget.getLdStMultipleTiming()) {
3836  return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3838  // Assume the worst.
3839  return NumRegs;
3841  if (NumRegs < 4)
3842  return 2;
3843  // 4 registers would be issued: 2, 2.
3844  // 5 registers would be issued: 2, 2, 1.
3845  unsigned UOps = (NumRegs / 2);
3846  if (NumRegs % 2)
3847  ++UOps;
3848  return UOps;
3849  }
3851  unsigned UOps = (NumRegs / 2);
3852  // If there are odd number of registers or if it's not 64-bit aligned,
3853  // then it takes an extra AGU (Address Generation Unit) cycle.
3854  if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3855  (*MI.memoperands_begin())->getAlign() < Align(8))
3856  ++UOps;
3857  return UOps;
3858  }
3859  }
3860  }
3861  }
3862  llvm_unreachable("Didn't find the number of microops");
3863 }
3864 
3865 int
3866 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3867  const MCInstrDesc &DefMCID,
3868  unsigned DefClass,
3869  unsigned DefIdx, unsigned DefAlign) const {
3870  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3871  if (RegNo <= 0)
3872  // Def is the address writeback.
3873  return ItinData->getOperandCycle(DefClass, DefIdx);
3874 
3875  int DefCycle;
3876  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3877  // (regno / 2) + (regno % 2) + 1
3878  DefCycle = RegNo / 2 + 1;
3879  if (RegNo % 2)
3880  ++DefCycle;
3881  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3882  DefCycle = RegNo;
3883  bool isSLoad = false;
3884 
3885  switch (DefMCID.getOpcode()) {
3886  default: break;
3887  case ARM::VLDMSIA:
3888  case ARM::VLDMSIA_UPD:
3889  case ARM::VLDMSDB_UPD:
3890  isSLoad = true;
3891  break;
3892  }
3893 
3894  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3895  // then it takes an extra cycle.
3896  if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3897  ++DefCycle;
3898  } else {
3899  // Assume the worst.
3900  DefCycle = RegNo + 2;
3901  }
3902 
3903  return DefCycle;
3904 }
3905 
3906 int
3907 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3908  const MCInstrDesc &DefMCID,
3909  unsigned DefClass,
3910  unsigned DefIdx, unsigned DefAlign) const {
3911  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3912  if (RegNo <= 0)
3913  // Def is the address writeback.
3914  return ItinData->getOperandCycle(DefClass, DefIdx);
3915 
3916  int DefCycle;
3917  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3918  // 4 registers would be issued: 1, 2, 1.
3919  // 5 registers would be issued: 1, 2, 2.
3920  DefCycle = RegNo / 2;
3921  if (DefCycle < 1)
3922  DefCycle = 1;
3923  // Result latency is issue cycle + 2: E2.
3924  DefCycle += 2;
3925  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3926  DefCycle = (RegNo / 2);
3927  // If there are odd number of registers or if it's not 64-bit aligned,
3928  // then it takes an extra AGU (Address Generation Unit) cycle.
3929  if ((RegNo % 2) || DefAlign < 8)
3930  ++DefCycle;
3931  // Result latency is AGU cycles + 2.
3932  DefCycle += 2;
3933  } else {
3934  // Assume the worst.
3935  DefCycle = RegNo + 2;
3936  }
3937 
3938  return DefCycle;
3939 }
3940 
3941 int
3942 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3943  const MCInstrDesc &UseMCID,
3944  unsigned UseClass,
3945  unsigned UseIdx, unsigned UseAlign) const {
3946  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3947  if (RegNo <= 0)
3948  return ItinData->getOperandCycle(UseClass, UseIdx);
3949 
3950  int UseCycle;
3951  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3952  // (regno / 2) + (regno % 2) + 1
3953  UseCycle = RegNo / 2 + 1;
3954  if (RegNo % 2)
3955  ++UseCycle;
3956  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3957  UseCycle = RegNo;
3958  bool isSStore = false;
3959 
3960  switch (UseMCID.getOpcode()) {
3961  default: break;
3962  case ARM::VSTMSIA:
3963  case ARM::VSTMSIA_UPD:
3964  case ARM::VSTMSDB_UPD:
3965  isSStore = true;
3966  break;
3967  }
3968 
3969  // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3970  // then it takes an extra cycle.
3971  if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3972  ++UseCycle;
3973  } else {
3974  // Assume the worst.
3975  UseCycle = RegNo + 2;
3976  }
3977 
3978  return UseCycle;
3979 }
3980 
3981 int
3982 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3983  const MCInstrDesc &UseMCID,
3984  unsigned UseClass,
3985  unsigned UseIdx, unsigned UseAlign) const {
3986  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3987  if (RegNo <= 0)
3988  return ItinData->getOperandCycle(UseClass, UseIdx);
3989 
3990  int UseCycle;
3991  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3992  UseCycle = RegNo / 2;
3993  if (UseCycle < 2)
3994  UseCycle = 2;
3995  // Read in E3.
3996  UseCycle += 2;
3997  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3998  UseCycle = (RegNo / 2);
3999  // If there are odd number of registers or if it's not 64-bit aligned,
4000  // then it takes an extra AGU (Address Generation Unit) cycle.
4001  if ((RegNo % 2) || UseAlign < 8)
4002  ++UseCycle;
4003  } else {
4004  // Assume the worst.
4005  UseCycle = 1;
4006  }
4007  return UseCycle;
4008 }
4009 
4010 int
4012  const MCInstrDesc &DefMCID,
4013  unsigned DefIdx, unsigned DefAlign,
4014  const MCInstrDesc &UseMCID,
4015  unsigned UseIdx, unsigned UseAlign) const {
4016  unsigned DefClass = DefMCID.getSchedClass();
4017  unsigned UseClass = UseMCID.getSchedClass();
4018 
4019  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4020  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4021 
4022  // This may be a def / use of a variable_ops instruction, the operand
4023  // latency might be determinable dynamically. Let the target try to
4024  // figure it out.
4025  int DefCycle = -1;
4026  bool LdmBypass = false;
4027  switch (DefMCID.getOpcode()) {
4028  default:
4029  DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4030  break;
4031 
4032  case ARM::VLDMDIA:
4033  case ARM::VLDMDIA_UPD:
4034  case ARM::VLDMDDB_UPD:
4035  case ARM::VLDMSIA:
4036  case ARM::VLDMSIA_UPD:
4037  case ARM::VLDMSDB_UPD:
4038  DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4039  break;
4040 
4041  case ARM::LDMIA_RET:
4042  case ARM::LDMIA:
4043  case ARM::LDMDA:
4044  case ARM::LDMDB:
4045  case ARM::LDMIB:
4046  case ARM::LDMIA_UPD:
4047  case ARM::LDMDA_UPD:
4048  case ARM::LDMDB_UPD:
4049  case ARM::LDMIB_UPD:
4050  case ARM::tLDMIA:
4051  case ARM::tLDMIA_UPD:
4052  case ARM::tPUSH:
4053  case ARM::t2LDMIA_RET:
4054  case ARM::t2LDMIA:
4055  case ARM::t2LDMDB:
4056  case ARM::t2LDMIA_UPD:
4057  case ARM::t2LDMDB_UPD:
4058  LdmBypass = true;
4059  DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4060  break;
4061  }
4062 
4063  if (DefCycle == -1)
4064  // We can't seem to determine the result latency of the def, assume it's 2.
4065  DefCycle = 2;
4066 
4067  int UseCycle = -1;
4068  switch (UseMCID.getOpcode()) {
4069  default:
4070  UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4071  break;
4072 
4073  case ARM::VSTMDIA:
4074  case ARM::VSTMDIA_UPD:
4075  case ARM::VSTMDDB_UPD:
4076  case ARM::VSTMSIA:
4077  case ARM::VSTMSIA_UPD:
4078  case ARM::VSTMSDB_UPD:
4079  UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4080  break;
4081 
4082  case ARM::STMIA:
4083  case ARM::STMDA:
4084  case ARM::STMDB:
4085  case ARM::STMIB:
4086  case ARM::STMIA_UPD:
4087  case ARM::STMDA_UPD:
4088  case ARM::STMDB_UPD:
4089  case ARM::STMIB_UPD:
4090  case ARM::tSTMIA_UPD:
4091  case ARM::tPOP_RET:
4092  case ARM::tPOP:
4093  case ARM::t2STMIA:
4094  case ARM::t2STMDB:
4095  case ARM::t2STMIA_UPD:
4096  case ARM::t2STMDB_UPD:
4097  UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4098  break;
4099  }
4100 
4101  if (UseCycle == -1)
4102  // Assume it's read in the first stage.
4103  UseCycle = 1;
4104 
4105  UseCycle = DefCycle - UseCycle + 1;
4106  if (UseCycle > 0) {
4107  if (LdmBypass) {
4108  // It's a variable_ops instruction so we can't use DefIdx here. Just use
4109  // first def operand.
4110  if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4111  UseClass, UseIdx))
4112  --UseCycle;
4113  } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4114  UseClass, UseIdx)) {
4115  --UseCycle;
4116  }
4117  }
4118 
4119  return UseCycle;
4120 }
4121 
4123  const MachineInstr *MI, unsigned Reg,
4124  unsigned &DefIdx, unsigned &Dist) {
4125  Dist = 0;
4126 
4128  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4129  assert(II->isInsideBundle() && "Empty bundle?");
4130 
4131  int Idx = -1;
4132  while (II->isInsideBundle()) {
4133  Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
4134  if (Idx != -1)
4135  break;
4136  --II;
4137  ++Dist;
4138  }
4139 
4140  assert(Idx != -1 && "Cannot find bundled definition!");
4141  DefIdx = Idx;
4142  return &*II;
4143 }
4144 
4146  const MachineInstr &MI, unsigned Reg,
4147  unsigned &UseIdx, unsigned &Dist) {
4148  Dist = 0;
4149 
4150  MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4151  assert(II->isInsideBundle() && "Empty bundle?");
4152  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4153 
4154  // FIXME: This doesn't properly handle multiple uses.
4155  int Idx = -1;
4156  while (II != E && II->isInsideBundle()) {
4157  Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
4158  if (Idx != -1)
4159  break;
4160  if (II->getOpcode() != ARM::t2IT)
4161  ++Dist;
4162  ++II;
4163  }
4164 
4165  if (Idx == -1) {
4166  Dist = 0;
4167  return nullptr;
4168  }
4169 
4170  UseIdx = Idx;
4171  return &*II;
4172 }
4173 
4174 /// Return the number of cycles to add to (or subtract from) the static
4175 /// itinerary based on the def opcode and alignment. The caller will ensure that
4176 /// adjusted latency is at least one cycle.
4177 static int adjustDefLatency(const ARMSubtarget &Subtarget,
4178  const MachineInstr &DefMI,
4179  const MCInstrDesc &DefMCID, unsigned DefAlign) {
4180  int Adjust = 0;
4181  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4182  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4183  // variants are one cycle cheaper.
4184  switch (DefMCID.getOpcode()) {
4185  default: break;
4186  case ARM::LDRrs:
4187  case ARM::LDRBrs: {
4188  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4189  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4190  if (ShImm == 0 ||
4191  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4192  --Adjust;
4193  break;
4194  }
4195  case ARM::t2LDRs:
4196  case ARM::t2LDRBs:
4197  case ARM::t2LDRHs:
4198  case ARM::t2LDRSHs: {
4199  // Thumb2 mode: lsl only.
4200  unsigned ShAmt = DefMI.getOperand(3).getImm();
4201  if (ShAmt == 0 || ShAmt == 2)
4202  --Adjust;
4203  break;
4204  }
4205  }
4206  } else if (Subtarget.isSwift()) {
4207  // FIXME: Properly handle all of the latency adjustments for address
4208  // writeback.
4209  switch (DefMCID.getOpcode()) {
4210  default: break;
4211  case ARM::LDRrs:
4212  case ARM::LDRBrs: {
4213  unsigned ShOpVal = DefMI.getOperand(3).getImm();
4214  bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4215  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4216  if (!isSub &&
4217  (ShImm == 0 ||
4218  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4219  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4220  Adjust -= 2;
4221  else if (!isSub &&
4222  ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4223  --Adjust;
4224  break;
4225  }
4226  case ARM::t2LDRs:
4227  case ARM::t2LDRBs:
4228  case ARM::t2LDRHs:
4229  case ARM::t2LDRSHs: {
4230  // Thumb2 mode: lsl only.
4231  unsigned ShAmt = DefMI.getOperand(3).getImm();
4232  if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4233  Adjust -= 2;
4234  break;
4235  }
4236  }
4237  }
4238 
4239  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4240  switch (DefMCID.getOpcode()) {
4241  default: break;
4242  case ARM::VLD1q8:
4243  case ARM::VLD1q16:
4244  case ARM::VLD1q32:
4245  case ARM::VLD1q64:
4246  case ARM::VLD1q8wb_fixed:
4247  case ARM::VLD1q16wb_fixed:
4248  case ARM::VLD1q32wb_fixed:
4249  case ARM::VLD1q64wb_fixed:
4250  case ARM::VLD1q8wb_register:
4251  case ARM::VLD1q16wb_register:
4252  case ARM::VLD1q32wb_register:
4253  case ARM::VLD1q64wb_register:
4254  case ARM::VLD2d8:
4255  case ARM::VLD2d16:
4256  case ARM::VLD2d32:
4257  case ARM::VLD2q8:
4258  case ARM::VLD2q16:
4259  case ARM::VLD2q32:
4260  case ARM::VLD2d8wb_fixed:
4261  case ARM::VLD2d16wb_fixed:
4262  case ARM::VLD2d32wb_fixed:
4263  case ARM::VLD2q8wb_fixed:
4264  case ARM::VLD2q16wb_fixed:
4265  case ARM::VLD2q32wb_fixed:
4266  case ARM::VLD2d8wb_register:
4267  case ARM::VLD2d16wb_register:
4268  case ARM::VLD2d32wb_register:
4269  case ARM::VLD2q8wb_register:
4270  case ARM::VLD2q16wb_register:
4271  case ARM::VLD2q32wb_register:
4272  case ARM::VLD3d8:
4273  case ARM::VLD3d16:
4274  case ARM::VLD3d32:
4275  case ARM::VLD1d64T:
4276  case ARM::VLD3d8_UPD:
4277  case ARM::VLD3d16_UPD:
4278  case ARM::VLD3d32_UPD:
4279  case ARM::VLD1d64Twb_fixed:
4280  case ARM::VLD1d64Twb_register:
4281  case ARM::VLD3q8_UPD:
4282  case ARM::VLD3q16_UPD:
4283  case ARM::VLD3q32_UPD:
4284  case ARM::VLD4d8:
4285  case ARM::VLD4d16:
4286  case ARM::VLD4d32:
4287  case ARM::VLD1d64Q:
4288  case ARM::VLD4d8_UPD:
4289  case ARM::VLD4d16_UPD:
4290  case ARM::VLD4d32_UPD:
4291  case ARM::VLD1d64Qwb_fixed:
4292  case ARM::VLD1d64Qwb_register:
4293  case ARM::VLD4q8_UPD:
4294  case ARM::VLD4q16_UPD:
4295  case ARM::VLD4q32_UPD:
4296  case ARM::VLD1DUPq8:
4297  case ARM::VLD1DUPq16:
4298  case ARM::VLD1DUPq32:
4299  case ARM::VLD1DUPq8wb_fixed:
4300  case ARM::VLD1DUPq16wb_fixed:
4301  case ARM::VLD1DUPq32wb_fixed:
4302  case ARM::VLD1DUPq8wb_register:
4303  case ARM::VLD1DUPq16wb_register:
4304  case ARM::VLD1DUPq32wb_register:
4305  case ARM::VLD2DUPd8:
4306  case ARM::VLD2DUPd16:
4307  case ARM::VLD2DUPd32:
4308  case ARM::VLD2DUPd8wb_fixed:
4309  case ARM::VLD2DUPd16wb_fixed:
4310  case ARM::VLD2DUPd32wb_fixed:
4311  case ARM::VLD2DUPd8wb_register:
4312  case ARM::VLD2DUPd16wb_register:
4313  case ARM::VLD2DUPd32wb_register:
4314  case ARM::VLD4DUPd8:
4315  case ARM::VLD4DUPd16:
4316  case ARM::VLD4DUPd32:
4317  case ARM::VLD4DUPd8_UPD:
4318  case ARM::VLD4DUPd16_UPD:
4319  case ARM::VLD4DUPd32_UPD:
4320  case ARM::VLD1LNd8:
4321  case ARM::VLD1LNd16:
4322  case ARM::VLD1LNd32:
4323  case ARM::VLD1LNd8_UPD:
4324  case ARM::VLD1LNd16_UPD:
4325  case ARM::VLD1LNd32_UPD:
4326  case ARM::VLD2LNd8:
4327  case ARM::VLD2LNd16:
4328  case ARM::VLD2LNd32:
4329  case ARM::VLD2LNq16:
4330  case ARM::VLD2LNq32:
4331  case ARM::VLD2LNd8_UPD:
4332  case ARM::VLD2LNd16_UPD:
4333  case ARM::VLD2LNd32_UPD:
4334  case ARM::VLD2LNq16_UPD:
4335  case ARM::VLD2LNq32_UPD:
4336  case ARM::VLD4LNd8:
4337  case ARM::VLD4LNd16:
4338  case ARM::VLD4LNd32:
4339  case ARM::VLD4LNq16:
4340  case ARM::VLD4LNq32:
4341  case ARM::VLD4LNd8_UPD:
4342  case ARM::VLD4LNd16_UPD:
4343  case ARM::VLD4LNd32_UPD:
4344  case ARM::VLD4LNq16_UPD:
4345  case ARM::VLD4LNq32_UPD:
4346  // If the address is not 64-bit aligned, the latencies of these
4347  // instructions increases by one.
4348  ++Adjust;
4349  break;
4350  }
4351  }
4352  return Adjust;
4353 }
4354 
4356  const MachineInstr &DefMI,
4357  unsigned DefIdx,
4358  const MachineInstr &UseMI,
4359  unsigned UseIdx) const {
4360  // No operand latency. The caller may fall back to getInstrLatency.
4361  if (!ItinData || ItinData->isEmpty())
4362  return -1;
4363 
4364  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4365  Register Reg = DefMO.getReg();
4366 
4367  const MachineInstr *ResolvedDefMI = &DefMI;
4368  unsigned DefAdj = 0;
4369  if (DefMI.isBundle())
4370  ResolvedDefMI =
4371  getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4372  if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4373  ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4374  return 1;
4375  }
4376 
4377  const MachineInstr *ResolvedUseMI = &UseMI;
4378  unsigned UseAdj = 0;
4379  if (UseMI.isBundle()) {
4380  ResolvedUseMI =
4381  getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4382  if (!ResolvedUseMI)
4383  return -1;
4384  }
4385 
4386  return getOperandLatencyImpl(
4387  ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4388  Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4389 }
4390 
4391 int ARMBaseInstrInfo::getOperandLatencyImpl(
4392  const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4393  unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4394  const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4395  unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4396  if (Reg == ARM::CPSR) {
4397  if (DefMI.getOpcode() == ARM::FMSTAT) {
4398  // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4399  return Subtarget.isLikeA9() ? 1 : 20;
4400  }
4401 
4402  // CPSR set and branch can be paired in the same cycle.
4403  if (UseMI.isBranch())
4404  return 0;
4405 
4406  // Otherwise it takes the instruction latency (generally one).
4407  unsigned Latency = getInstrLatency(ItinData, DefMI);
4408 
4409  // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4410  // its uses. Instructions which are otherwise scheduled between them may
4411  // incur a code size penalty (not able to use the CPSR setting 16-bit
4412  // instructions).
4413  if (Latency > 0 && Subtarget.isThumb2()) {
4414  const MachineFunction *MF = DefMI.getParent()->getParent();
4415  // FIXME: Use Function::hasOptSize().
4416  if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4417  --Latency;
4418  }
4419  return Latency;
4420  }
4421 
4422  if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4423  return -1;
4424 
4425  unsigned DefAlign = DefMI.hasOneMemOperand()
4426  ? (*DefMI.memoperands_begin())->getAlign().value()
4427  : 0;
4428  unsigned UseAlign = UseMI.hasOneMemOperand()
4429  ? (*UseMI.memoperands_begin())->getAlign().value()
4430  : 0;
4431 
4432  // Get the itinerary's latency if possible, and handle variable_ops.
4433  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4434  UseIdx, UseAlign);
4435  // Unable to find operand latency. The caller may resort to getInstrLatency.
4436  if (Latency < 0)
4437  return Latency;
4438 
4439  // Adjust for IT block position.
4440  int Adj = DefAdj + UseAdj;
4441 
4442  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4443  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4444  if (Adj >= 0 || (int)Latency > -Adj) {
4445  return Latency + Adj;
4446  }
4447  // Return the itinerary latency, which may be zero but not less than zero.
4448  return Latency;
4449 }
4450 
4451 int
4453  SDNode *DefNode, unsigned DefIdx,
4454  SDNode *UseNode, unsigned UseIdx) const {
4455  if (!DefNode->isMachineOpcode())
4456  return 1;
4457 
4458  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4459 
4460  if (isZeroCost(DefMCID.Opcode))
4461  return 0;
4462 
4463  if (!ItinData || ItinData->isEmpty())
4464  return DefMCID.mayLoad() ? 3 : 1;
4465 
4466  if (!UseNode->isMachineOpcode()) {
4467  int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4468  int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4469  int Threshold = 1 + Adj;
4470  return Latency <= Threshold ? 1 : Latency - Adj;
4471  }
4472 
4473  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4474  auto *DefMN = cast<MachineSDNode>(DefNode);
4475  unsigned DefAlign = !DefMN->memoperands_empty()
4476  ? (*DefMN->memoperands_begin())->getAlign().value()
4477  : 0;
4478  auto *UseMN = cast<MachineSDNode>(UseNode);
4479  unsigned UseAlign = !UseMN->memoperands_empty()
4480  ? (*UseMN->memoperands_begin())->getAlign().value()
4481  : 0;
4482  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4483  UseMCID, UseIdx, UseAlign);
4484 
4485  if (Latency > 1 &&
4486  (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4487  Subtarget.isCortexA7())) {
4488  // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4489  // variants are one cycle cheaper.
4490  switch (DefMCID.getOpcode()) {
4491  default: break;
4492  case ARM::LDRrs:
4493  case ARM::LDRBrs: {
4494  unsigned ShOpVal =
4495  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4496  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4497  if (ShImm == 0 ||
4498  (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4499  --Latency;
4500  break;
4501  }
4502  case ARM::t2LDRs:
4503  case ARM::t2LDRBs:
4504  case ARM::t2LDRHs:
4505  case ARM::t2LDRSHs: {
4506  // Thumb2 mode: lsl only.
4507  unsigned ShAmt =
4508  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4509  if (ShAmt == 0 || ShAmt == 2)
4510  --Latency;
4511  break;
4512  }
4513  }
4514  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4515  // FIXME: Properly handle all of the latency adjustments for address
4516  // writeback.
4517  switch (DefMCID.getOpcode()) {
4518  default: break;
4519  case ARM::LDRrs:
4520  case ARM::LDRBrs: {
4521  unsigned ShOpVal =
4522  cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4523  unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4524  if (ShImm == 0 ||
4525  ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4526  ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4527  Latency -= 2;
4528  else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4529  --Latency;
4530  break;
4531  }
4532  case ARM::t2LDRs:
4533  case ARM::t2LDRBs:
4534  case ARM::t2LDRHs:
4535  case ARM::t2LDRSHs:
4536  // Thumb2 mode: lsl 0-3 only.
4537  Latency -= 2;
4538  break;
4539  }
4540  }
4541 
4542  if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4543  switch (DefMCID.getOpcode()) {
4544  default: break;
4545  case ARM::VLD1q8:
4546  case ARM::VLD1q16:
4547  case ARM::VLD1q32:
4548  case ARM::VLD1q64:
4549  case ARM::VLD1q8wb_register:
4550  case ARM::VLD1q16wb_register:
4551  case ARM::VLD1q32wb_register:
4552  case ARM::VLD1q64wb_register:
4553  case ARM::VLD1q8wb_fixed:
4554  case ARM::VLD1q16wb_fixed:
4555  case ARM::VLD1q32wb_fixed:
4556  case ARM::VLD1q64wb_fixed:
4557  case ARM::VLD2d8:
4558  case ARM::VLD2d16:
4559  case ARM::VLD2d32:
4560  case ARM::VLD2q8Pseudo:
4561  case ARM::VLD2q16Pseudo:
4562  case ARM::VLD2q32Pseudo:
4563  case ARM::VLD2d8wb_fixed:
4564  case ARM::VLD2d16wb_fixed:
4565  case ARM::VLD2d32wb_fixed:
4566  case ARM::VLD2q8PseudoWB_fixed:
4567  case ARM::VLD2q16PseudoWB_fixed:
4568  case ARM::VLD2q32PseudoWB_fixed:
4569  case ARM::VLD2d8wb_register:
4570  case ARM::VLD2d16wb_register:
4571  case ARM::VLD2d32wb_register:
4572  case ARM::VLD2q8PseudoWB_register:
4573  case ARM::VLD2q16PseudoWB_register:
4574  case ARM::VLD2q32PseudoWB_register:
4575  case ARM::VLD3d8Pseudo:
4576  case ARM::VLD3d16Pseudo:
4577  case ARM::VLD3d32Pseudo:
4578  case ARM::VLD1d8TPseudo:
4579  case ARM::VLD1d16TPseudo:
4580  case ARM::VLD1d32TPseudo:
4581  case ARM::VLD1d64TPseudo:
4582  case ARM::VLD1d64TPseudoWB_fixed:
4583  case ARM::VLD1d64TPseudoWB_register:
4584  case ARM::VLD3d8Pseudo_UPD:
4585  case ARM::VLD3d16Pseudo_UPD:
4586  case ARM::VLD3d32Pseudo_UPD:
4587  case ARM::VLD3q8Pseudo_UPD:
4588  case ARM::VLD3q16Pseudo_UPD:
4589  case ARM::VLD3q32Pseudo_UPD:
4590  case ARM::VLD3q8oddPseudo:
4591  case ARM::VLD3q16oddPseudo:
4592  case ARM::VLD3q32oddPseudo:
4593  case ARM::VLD3q8oddPseudo_UPD:
4594  case ARM::VLD3q16oddPseudo_UPD:
4595  case ARM::VLD3q32oddPseudo_UPD:
4596  case ARM::VLD4d8Pseudo:
4597  case ARM::VLD4d16Pseudo:
4598  case ARM::VLD4d32Pseudo:
4599  case ARM::VLD1d8QPseudo:
4600  case ARM::VLD1d16QPseudo:
4601  case ARM::VLD1d32QPseudo:
4602  case ARM::VLD1d64QPseudo:
4603  case ARM::VLD1d64QPseudoWB_fixed:
4604  case ARM::VLD1d64QPseudoWB_register:
4605  case ARM::VLD1q8HighQPseudo:
4606  case ARM::VLD1q8LowQPseudo_UPD:
4607  case ARM::VLD1q8HighTPseudo:
4608  case ARM::VLD1q8LowTPseudo_UPD:
4609  case ARM::VLD1q16HighQPseudo:
4610  case ARM::VLD1q16LowQPseudo_UPD:
4611  case ARM::VLD1q16HighTPseudo:
4612  case ARM::VLD1q16LowTPseudo_UPD:
4613  case ARM::VLD1q32HighQPseudo:
4614  case ARM::VLD1q32LowQPseudo_UPD:
4615  case ARM::VLD1q32HighTPseudo:
4616  case ARM::VLD1q32LowTPseudo_UPD:
4617  case ARM::VLD1q64HighQPseudo:
4618  case ARM::VLD1q64LowQPseudo_UPD:
4619  case ARM::VLD1q64HighTPseudo:
4620  case ARM::VLD1q64LowTPseudo_UPD:
4621  case ARM::VLD4d8Pseudo_UPD:
4622  case ARM::VLD4d16Pseudo_UPD:
4623  case ARM::VLD4d32Pseudo_UPD:
4624  case ARM::VLD4q8Pseudo_UPD:
4625  case ARM::VLD4q16Pseudo_UPD:
4626  case ARM::VLD4q32Pseudo_UPD:
4627  case ARM::VLD4q8oddPseudo:
4628  case ARM::VLD4q16oddPseudo:
4629  case ARM::VLD4q32oddPseudo:
4630  case ARM::VLD4q8oddPseudo_UPD:
4631  case ARM::VLD4q16oddPseudo_UPD:
4632  case ARM::VLD4q32oddPseudo_UPD:
4633  case ARM::VLD1DUPq8:
4634  case ARM::VLD1DUPq16:
4635  case ARM::VLD1DUPq32:
4636  case ARM::VLD1DUPq8wb_fixed:
4637  case ARM::VLD1DUPq16wb_fixed:
4638  case ARM::VLD1DUPq32wb_fixed:
4639  case ARM::VLD1DUPq8wb_register:
4640  case ARM::VLD1DUPq16wb_register:
4641  case ARM::VLD1DUPq32wb_register:
4642  case ARM::VLD2DUPd8:
4643  case ARM::VLD2DUPd16:
4644  case ARM::VLD2DUPd32:
4645  case ARM::VLD2DUPd8wb_fixed:
4646  case ARM::VLD2DUPd16wb_fixed:
4647  case ARM::VLD2DUPd32wb_fixed:
4648  case ARM::VLD2DUPd8wb_register:
4649  case ARM::VLD2DUPd16wb_register:
4650  case ARM::VLD2DUPd32wb_register:
4651  case ARM::VLD2DUPq8EvenPseudo:
4652  case ARM::VLD2DUPq8OddPseudo:
4653  case ARM::VLD2DUPq16EvenPseudo:
4654  case ARM::VLD2DUPq16OddPseudo:
4655  case ARM::VLD2DUPq32EvenPseudo:
4656  case ARM::VLD2DUPq32OddPseudo:
4657  case ARM::VLD3DUPq8EvenPseudo:
4658  case ARM::VLD3DUPq8OddPseudo:
4659  case ARM::VLD3DUPq16EvenPseudo:
4660  case ARM::VLD3DUPq16OddPseudo:
4661  case ARM::VLD3DUPq32EvenPseudo:
4662  case ARM::VLD3DUPq32OddPseudo:
4663  case ARM::VLD4DUPd8Pseudo:
4664  case ARM::VLD4DUPd16Pseudo:
4665  case ARM::VLD4DUPd32Pseudo:
4666  case ARM::VLD4DUPd8Pseudo_UPD:
4667  case ARM::VLD4DUPd16Pseudo_UPD:
4668  case ARM::VLD4DUPd32Pseudo_UPD:
4669  case ARM::VLD4DUPq8EvenPseudo:
4670  case ARM::VLD4DUPq8OddPseudo:
4671  case ARM::VLD4DUPq16EvenPseudo:
4672  case ARM::VLD4DUPq16OddPseudo:
4673  case ARM::VLD4DUPq32EvenPseudo:
4674  case ARM::VLD4DUPq32OddPseudo:
4675  case ARM::VLD1LNq8Pseudo:
4676  case ARM::VLD1LNq16Pseudo:
4677  case ARM::VLD1LNq32Pseudo:
4678  case ARM::VLD1LNq8Pseudo_UPD:
4679  case ARM::VLD1LNq16Pseudo_UPD:
4680  case ARM::VLD1LNq32Pseudo_UPD:
4681  case ARM::VLD2LNd8Pseudo:
4682  case ARM::VLD2LNd16Pseudo:
4683  case ARM::VLD2LNd32Pseudo:
4684  case ARM::VLD2LNq16Pseudo:
4685  case ARM::VLD2LNq32Pseudo:
4686  case ARM::VLD2LNd8Pseudo_UPD:
4687  case ARM::VLD2LNd16Pseudo_UPD:
4688  case ARM::VLD2LNd32Pseudo_UPD:
4689  case ARM::VLD2LNq16Pseudo_UPD:
4690  case ARM::VLD2LNq32Pseudo_UPD:
4691  case ARM::VLD4LNd8Pseudo:
4692  case ARM::VLD4LNd16Pseudo:
4693  case ARM::VLD4LNd32Pseudo:
4694  case ARM::VLD4LNq16Pseudo:
4695  case ARM::VLD4LNq32Pseudo:
4696  case ARM::VLD4LNd8Pseudo_UPD:
4697  case ARM::VLD4LNd16Pseudo_UPD:
4698  case ARM::VLD4LNd32Pseudo_UPD:
4699  case ARM::VLD4LNq16Pseudo_UPD:
4700  case ARM::VLD4LNq32Pseudo_UPD:
4701  // If the address is not 64-bit aligned, the latencies of these
4702  // instructions increases by one.
4703  ++Latency;
4704  break;
4705  }
4706 
4707  return Latency;
4708 }
4709 
4710 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4711  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4712  MI.isImplicitDef())
4713  return 0;
4714 
4715  if (MI.isBundle())
4716  return 0;
4717 
4718  const MCInstrDesc &MCID = MI.getDesc();
4719 
4720  if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4721  !Subtarget.cheapPredicableCPSRDef())) {
4722  // When predicated, CPSR is an additional source operand for CPSR updating
4723  // instructions, this apparently increases their latencies.
4724  return 1;
4725  }
4726  return 0;
4727 }
4728 
4729 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4730  const MachineInstr &MI,
4731  unsigned *PredCost) const {
4732  if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4733  MI.isImplicitDef())
4734  return 1;
4735 
4736  // An instruction scheduler typically runs on unbundled instructions, however
4737  // other passes may query the latency of a bundled instruction.
4738  if (MI.isBundle()) {
4739  unsigned Latency = 0;
4741  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4742  while (++I != E && I->isInsideBundle()) {
4743  if (I->getOpcode() != ARM::t2IT)
4744  Latency += getInstrLatency(ItinData, *I, PredCost);
4745  }
4746  return Latency;
4747  }
4748 
4749  const MCInstrDesc &MCID = MI.getDesc();
4750  if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4751  !Subtarget.cheapPredicableCPSRDef()))) {
4752  // When predicated, CPSR is an additional source operand for CPSR updating
4753  // instructions, this apparently increases their latencies.
4754  *PredCost = 1;
4755  }
4756  // Be sure to call getStageLatency for an empty itinerary in case it has a
4757  // valid MinLatency property.
4758  if (!ItinData)
4759  return MI.mayLoad() ? 3 : 1;
4760 
4761  unsigned Class = MCID.getSchedClass();
4762 
4763  // For instructions with variable uops, use uops as latency.
4764  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4765  return getNumMicroOps(ItinData, MI);
4766 
4767  // For the common case, fall back on the itinerary's latency.
4768  unsigned Latency = ItinData->getStageLatency(Class);
4769 
4770  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4771  unsigned DefAlign =
4772  MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4773  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4774  if (Adj >= 0 || (int)Latency > -Adj) {
4775  return Latency + Adj;
4776  }
4777  return Latency;
4778 }
4779 
4780 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4781  SDNode *Node) const {
4782  if (!Node->isMachineOpcode())
4783  return 1;
4784 
4785  if (!ItinData || ItinData->isEmpty())
4786  return 1;
4787 
4788  unsigned Opcode = Node->getMachineOpcode();
4789  switch (Opcode) {
4790  default:
4791  return ItinData->getStageLatency(get(Opcode).getSchedClass());
4792  case ARM::VLDMQIA:
4793  case ARM::VSTMQIA:
4794  return 2;
4795  }
4796 }
4797 
4798 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4799  const MachineRegisterInfo *MRI,
4800  const MachineInstr &DefMI,
4801  unsigned DefIdx,
4802  const MachineInstr &UseMI,
4803  unsigned UseIdx) const {
4804  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4805  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4806  if (Subtarget.nonpipelinedVFP() &&
4807  (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4808  return true;
4809 
4810  // Hoist VFP / NEON instructions with 4 or higher latency.
4811  unsigned Latency =
4812  SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4813  if (Latency <= 3)
4814  return false;
4815  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4816  UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4817 }
4818 
4819 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4820  const MachineInstr &DefMI,
4821  unsigned DefIdx) const {
4822  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4823  if (!ItinData || ItinData->isEmpty())
4824  return false;
4825 
4826  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4827  if (DDomain == ARMII::DomainGeneral) {
4828  unsigned DefClass = DefMI.getDesc().getSchedClass();
4829  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4830  return (DefCycle != -1 && DefCycle <= 2);
4831  }
4832  return false;
4833 }
4834 
4835 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4836  StringRef &ErrInfo) const {
4837  if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4838  ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4839  return false;
4840  }
4841  if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4842  // Make sure we don't generate a lo-lo mov that isn't supported.
4843  if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4844  !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4845  ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4846  return false;
4847  }
4848  }
4849  if (MI.getOpcode() == ARM::tPUSH ||
4850  MI.getOpcode() == ARM::tPOP ||
4851  MI.getOpcode() == ARM::tPOP_RET) {
4852  for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4853  if (MO.isImplicit() || !MO.isReg())
4854  continue;
4855  Register Reg = MO.getReg();
4856  if (Reg < ARM::R0 || Reg > ARM::R7) {
4857  if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4858  !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4859  ErrInfo = "Unsupported register in Thumb1 push/pop";
4860  return false;
4861  }
4862  }
4863  }
4864  }
4865  if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4866  assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4867  if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4868  MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4869  ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4870  return false;
4871  }
4872  }
4873 
4874  // Check the address model by taking the first Imm operand and checking it is
4875  // legal for that addressing mode.
4877  (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4878  switch (AddrMode) {
4879  default:
4880  break;
4881  case ARMII::AddrModeT2_i7:
4884  case ARMII::AddrModeT2_i8:
4888  case ARMII::AddrModeT2_i12: {
4889  uint32_t Imm = 0;
4890  for (auto Op : MI.operands()) {
4891  if (Op.isImm()) {
4892  Imm = Op.getImm();
4893  break;
4894  }
4895  }
4896  if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4897  ErrInfo = "Incorrect AddrMode Imm for instruction";
4898  return false;
4899  }
4900  break;
4901  }
4902  }
4903  return true;
4904 }
4905 
4907  unsigned LoadImmOpc,
4908  unsigned LoadOpc) const {
4909  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4910  "ROPI/RWPI not currently supported with stack guard");
4911 
4912  MachineBasicBlock &MBB = *MI->getParent();
4913  DebugLoc DL = MI->getDebugLoc();
4914  Register Reg = MI->getOperand(0).getReg();
4915  MachineInstrBuilder MIB;
4916  unsigned int Offset = 0;
4917 
4918  if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4919  assert(Subtarget.isReadTPHard() &&
4920  "TLS stack protector requires hardware TLS register");
4921 
4922  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4923  .addImm(15)
4924  .addImm(0)
4925  .addImm(13)
4926  .addImm(0)
4927  .addImm(3)
4928  .add(predOps(ARMCC::AL));
4929 
4931  Offset = M.getStackProtectorGuardOffset();
4932  if (Offset & ~0xfffU) {
4933  // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4934  // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4935  // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4936  unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4937  BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4939  .addImm(Offset & ~0xfffU)
4940  .add(predOps(ARMCC::AL))
4941  .addReg(0);
4942  Offset &= 0xfffU;
4943  }
4944  } else {
4945  const GlobalValue *GV =
4946  cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4947  bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4948 
4949  unsigned TargetFlags = ARMII::MO_NO_FLAG;
4950  if (Subtarget.isTargetMachO()) {
4951  TargetFlags |= ARMII::MO_NONLAZY;
4952  } else if (Subtarget.isTargetCOFF()) {
4953  if (GV->hasDLLImportStorageClass())
4954  TargetFlags |= ARMII::MO_DLLIMPORT;
4955  else if (IsIndirect)
4956  TargetFlags |= ARMII::MO_COFFSTUB;
4957  } else if (Subtarget.isGVInGOT(GV)) {
4958  TargetFlags |= ARMII::MO_GOT;
4959  }
4960 
4961  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4962  .addGlobalAddress(GV, 0, TargetFlags);
4963 
4964  if (IsIndirect) {
4965  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4966  MIB.addReg(Reg, RegState::Kill).addImm(0);
4967  auto Flags = MachineMemOperand::MOLoad |
4971  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4972  MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4973  }
4974  }
4975 
4976  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4977  MIB.addReg(Reg, RegState::Kill)
4978  .addImm(Offset)
4979  .cloneMemRefs(*MI)
4980  .add(predOps(ARMCC::AL));
4981 }
4982 
4983 bool
4984 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4985  unsigned &AddSubOpc,
4986  bool &NegAcc, bool &HasLane) const {
4987  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4988  if (I == MLxEntryMap.end())
4989  return false;
4990 
4991  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4992  MulOpc = Entry.MulOpc;
4993  AddSubOpc = Entry.AddSubOpc;
4994  NegAcc = Entry.NegAcc;
4995  HasLane = Entry.HasLane;
4996  return true;
4997 }
4998 
4999 //===----------------------------------------------------------------------===//
5000 // Execution domains.
5001 //===----------------------------------------------------------------------===//
5002 //
5003 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5004 // and some can go down both. The vmov instructions go down the VFP pipeline,
5005 // but they can be changed to vorr equivalents that are executed by the NEON
5006 // pipeline.
5007 //
5008 // We use the following execution domain numbering:
5009 //
5012  ExeVFP = 1,
5014 };
5015 
5016 //
5017 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5018 //
5019 std::pair<uint16_t, uint16_t>
5021  // If we don't have access to NEON instructions then we won't be able
5022  // to swizzle anything to the NEON domain. Check to make sure.
5023  if (Subtarget.hasNEON()) {
5024  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5025  // if they are not predicated.
5026  if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5027  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5028 
5029  // CortexA9 is particularly picky about mixing the two and wants these
5030  // converted.
5031  if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5032  (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5033  MI.getOpcode() == ARM::VMOVS))
5034  return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5035  }
5036  // No other instructions can be swizzled, so just determine their domain.
5037  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5038 
5039  if (Domain & ARMII::DomainNEON)
5040  return std::make_pair(ExeNEON, 0);
5041 
5042  // Certain instructions can go either way on Cortex-A8.
5043  // Treat them as NEON instructions.
5044  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5045  return std::make_pair(ExeNEON, 0);
5046 
5047  if (Domain & ARMII::DomainVFP)
5048  return std::make_pair(ExeVFP, 0);
5049 
5050  return std::make_pair(ExeGeneric, 0);
5051 }
5052 
5054  unsigned SReg, unsigned &Lane) {
5055  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5056  Lane = 0;
5057 
5058  if (DReg != ARM::NoRegister)
5059  return DReg;
5060 
5061  Lane = 1;
5062  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5063 
5064  assert(DReg && "S-register with no D super-register?");
5065  return DReg;
5066 }
5067 
5068 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5069 /// set ImplicitSReg to a register number that must be marked as implicit-use or
5070 /// zero if no register needs to be defined as implicit-use.
5071 ///
5072 /// If the function cannot determine if an SPR should be marked implicit use or
5073 /// not, it returns false.
5074 ///
5075 /// This function handles cases where an instruction is being modified from taking
5076 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5077 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5078 /// lane of the DPR).
5079 ///
5080 /// If the other SPR is defined, an implicit-use of it should be added. Else,
5081 /// (including the case where the DPR itself is defined), it should not.
5082 ///
5084  MachineInstr &MI, unsigned DReg,
5085  unsigned Lane, unsigned &ImplicitSReg) {
5086  // If the DPR is defined or used already, the other SPR lane will be chained
5087  // correctly, so there is nothing to be done.
5088  if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5089  ImplicitSReg = 0;
5090  return true;
5091  }
5092 
5093  // Otherwise we need to go searching to see if the SPR is set explicitly.
5094  ImplicitSReg = TRI->getSubReg(DReg,
5095  (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5097  MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5098 
5099  if (LQR == MachineBasicBlock::LQR_Live)