LLVM 20.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
60#include "llvm/Support/Debug.h"
65#include <algorithm>
66#include <cassert>
67#include <cstdint>
68#include <iterator>
69#include <new>
70#include <utility>
71#include <vector>
72
73using namespace llvm;
74
75#define DEBUG_TYPE "arm-instrinfo"
76
77#define GET_INSTRINFO_CTOR_DTOR
78#include "ARMGenInstrInfo.inc"
79
80static cl::opt<bool>
81EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
82 cl::desc("Enable ARM 2-addr to 3-addr conv"));
83
84/// ARM_MLxEntry - Record information about MLA / MLS instructions.
86 uint16_t MLxOpc; // MLA / MLS opcode
87 uint16_t MulOpc; // Expanded multiplication opcode
88 uint16_t AddSubOpc; // Expanded add / sub opcode
89 bool NegAcc; // True if the acc is negated before the add / sub.
90 bool HasLane; // True if instruction has an extra "lane" operand.
91};
92
93static const ARM_MLxEntry ARM_MLxTable[] = {
94 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
95 // fp scalar ops
96 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
97 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
98 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
99 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
100 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
101 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
102 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
103 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
104
105 // fp SIMD ops
106 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
107 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
108 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
109 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
110 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
111 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
112 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
113 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
114};
115
117 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
118 Subtarget(STI) {
119 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
120 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
121 llvm_unreachable("Duplicated entries?");
122 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
123 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
124 }
125}
126
127// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
128// currently defaults to no prepass hazard recognizer.
131 const ScheduleDAG *DAG) const {
132 if (usePreRAHazardRecognizer()) {
133 const InstrItineraryData *II =
134 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
135 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
136 }
138}
139
140// Called during:
141// - pre-RA scheduling
142// - post-RA scheduling when FeatureUseMISched is set
144 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
146
147 // We would like to restrict this hazard recognizer to only
148 // post-RA scheduling; we can tell that we're post-RA because we don't
149 // track VRegLiveness.
150 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
151 // banks banked on bit 2. Assume that TCMs are in use.
152 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
154 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
155
156 // Not inserting ARMHazardRecognizerFPMLx because that would change
157 // legacy behavior
158
160 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
161 return MHR;
162}
163
164// Called during post-RA scheduling when FeatureUseMISched is not set
167 const ScheduleDAG *DAG) const {
169
170 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
171 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
172
174 if (BHR)
175 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
176 return MHR;
177}
178
181 LiveIntervals *LIS) const {
182 // FIXME: Thumb2 support.
183
184 if (!EnableARM3Addr)
185 return nullptr;
186
187 MachineFunction &MF = *MI.getParent()->getParent();
188 uint64_t TSFlags = MI.getDesc().TSFlags;
189 bool isPre = false;
190 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
191 default: return nullptr;
193 isPre = true;
194 break;
196 break;
197 }
198
199 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
200 // operation.
201 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
202 if (MemOpc == 0)
203 return nullptr;
204
205 MachineInstr *UpdateMI = nullptr;
206 MachineInstr *MemMI = nullptr;
207 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
208 const MCInstrDesc &MCID = MI.getDesc();
209 unsigned NumOps = MCID.getNumOperands();
210 bool isLoad = !MI.mayStore();
211 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
212 const MachineOperand &Base = MI.getOperand(2);
213 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
214 Register WBReg = WB.getReg();
215 Register BaseReg = Base.getReg();
216 Register OffReg = Offset.getReg();
217 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
218 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
219 switch (AddrMode) {
220 default: llvm_unreachable("Unknown indexed op!");
221 case ARMII::AddrMode2: {
222 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
223 unsigned Amt = ARM_AM::getAM2Offset(OffImm);
224 if (OffReg == 0) {
225 if (ARM_AM::getSOImmVal(Amt) == -1)
226 // Can't encode it in a so_imm operand. This transformation will
227 // add more than 1 instruction. Abandon!
228 return nullptr;
229 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
231 .addReg(BaseReg)
232 .addImm(Amt)
233 .add(predOps(Pred))
234 .add(condCodeOp());
235 } else if (Amt != 0) {
237 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
238 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
239 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
240 .addReg(BaseReg)
241 .addReg(OffReg)
242 .addReg(0)
243 .addImm(SOOpc)
244 .add(predOps(Pred))
245 .add(condCodeOp());
246 } else
247 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
248 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
249 .addReg(BaseReg)
250 .addReg(OffReg)
251 .add(predOps(Pred))
252 .add(condCodeOp());
253 break;
254 }
255 case ARMII::AddrMode3 : {
256 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
257 unsigned Amt = ARM_AM::getAM3Offset(OffImm);
258 if (OffReg == 0)
259 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
260 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
261 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
262 .addReg(BaseReg)
263 .addImm(Amt)
264 .add(predOps(Pred))
265 .add(condCodeOp());
266 else
267 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
268 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
269 .addReg(BaseReg)
270 .addReg(OffReg)
271 .add(predOps(Pred))
272 .add(condCodeOp());
273 break;
274 }
275 }
276
277 std::vector<MachineInstr*> NewMIs;
278 if (isPre) {
279 if (isLoad)
280 MemMI =
281 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
282 .addReg(WBReg)
283 .addImm(0)
284 .addImm(Pred);
285 else
286 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
287 .addReg(MI.getOperand(1).getReg())
288 .addReg(WBReg)
289 .addReg(0)
290 .addImm(0)
291 .addImm(Pred);
292 NewMIs.push_back(MemMI);
293 NewMIs.push_back(UpdateMI);
294 } else {
295 if (isLoad)
296 MemMI =
297 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
298 .addReg(BaseReg)
299 .addImm(0)
300 .addImm(Pred);
301 else
302 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
303 .addReg(MI.getOperand(1).getReg())
304 .addReg(BaseReg)
305 .addReg(0)
306 .addImm(0)
307 .addImm(Pred);
308 if (WB.isDead())
309 UpdateMI->getOperand(0).setIsDead();
310 NewMIs.push_back(UpdateMI);
311 NewMIs.push_back(MemMI);
312 }
313
314 // Transfer LiveVariables states, kill / dead info.
315 if (LV) {
316 for (const MachineOperand &MO : MI.operands()) {
317 if (MO.isReg() && MO.getReg().isVirtual()) {
318 Register Reg = MO.getReg();
319
320 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
321 if (MO.isDef()) {
322 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
323 if (MO.isDead())
324 LV->addVirtualRegisterDead(Reg, *NewMI);
325 }
326 if (MO.isUse() && MO.isKill()) {
327 for (unsigned j = 0; j < 2; ++j) {
328 // Look at the two new MI's in reverse order.
329 MachineInstr *NewMI = NewMIs[j];
330 if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))
331 continue;
332 LV->addVirtualRegisterKilled(Reg, *NewMI);
333 if (VI.removeKill(MI))
334 VI.Kills.push_back(NewMI);
335 break;
336 }
337 }
338 }
339 }
340 }
341
342 MachineBasicBlock &MBB = *MI.getParent();
343 MBB.insert(MI, NewMIs[1]);
344 MBB.insert(MI, NewMIs[0]);
345 return NewMIs[0];
346}
347
348// Branch analysis.
349// Cond vector output format:
350// 0 elements indicates an unconditional branch
351// 2 elements indicates a conditional branch; the elements are
352// the condition to check and the CPSR.
353// 3 elements indicates a hardware loop end; the elements
354// are the opcode, the operand value to test, and a dummy
355// operand used to pad out to 3 operands.
358 MachineBasicBlock *&FBB,
360 bool AllowModify) const {
361 TBB = nullptr;
362 FBB = nullptr;
363
365 if (I == MBB.instr_begin())
366 return false; // Empty blocks are easy.
367 --I;
368
369 // Walk backwards from the end of the basic block until the branch is
370 // analyzed or we give up.
371 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
372 // Flag to be raised on unanalyzeable instructions. This is useful in cases
373 // where we want to clean up on the end of the basic block before we bail
374 // out.
375 bool CantAnalyze = false;
376
377 // Skip over DEBUG values, predicated nonterminators and speculation
378 // barrier terminators.
379 while (I->isDebugInstr() || !I->isTerminator() ||
380 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
381 I->getOpcode() == ARM::t2DoLoopStartTP){
382 if (I == MBB.instr_begin())
383 return false;
384 --I;
385 }
386
387 if (isIndirectBranchOpcode(I->getOpcode()) ||
388 isJumpTableBranchOpcode(I->getOpcode())) {
389 // Indirect branches and jump tables can't be analyzed, but we still want
390 // to clean up any instructions at the tail of the basic block.
391 CantAnalyze = true;
392 } else if (isUncondBranchOpcode(I->getOpcode())) {
393 TBB = I->getOperand(0).getMBB();
394 } else if (isCondBranchOpcode(I->getOpcode())) {
395 // Bail out if we encounter multiple conditional branches.
396 if (!Cond.empty())
397 return true;
398
399 assert(!FBB && "FBB should have been null.");
400 FBB = TBB;
401 TBB = I->getOperand(0).getMBB();
402 Cond.push_back(I->getOperand(1));
403 Cond.push_back(I->getOperand(2));
404 } else if (I->isReturn()) {
405 // Returns can't be analyzed, but we should run cleanup.
406 CantAnalyze = true;
407 } else if (I->getOpcode() == ARM::t2LoopEnd &&
408 MBB.getParent()
411 if (!Cond.empty())
412 return true;
413 FBB = TBB;
414 TBB = I->getOperand(1).getMBB();
415 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
416 Cond.push_back(I->getOperand(0));
417 Cond.push_back(MachineOperand::CreateImm(0));
418 } else {
419 // We encountered other unrecognized terminator. Bail out immediately.
420 return true;
421 }
422
423 // Cleanup code - to be run for unpredicated unconditional branches and
424 // returns.
425 if (!isPredicated(*I) &&
426 (isUncondBranchOpcode(I->getOpcode()) ||
427 isIndirectBranchOpcode(I->getOpcode()) ||
428 isJumpTableBranchOpcode(I->getOpcode()) ||
429 I->isReturn())) {
430 // Forget any previous condition branch information - it no longer applies.
431 Cond.clear();
432 FBB = nullptr;
433
434 // If we can modify the function, delete everything below this
435 // unconditional branch.
436 if (AllowModify) {
437 MachineBasicBlock::iterator DI = std::next(I);
438 while (DI != MBB.instr_end()) {
439 MachineInstr &InstToDelete = *DI;
440 ++DI;
441 // Speculation barriers must not be deleted.
442 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
443 continue;
444 InstToDelete.eraseFromParent();
445 }
446 }
447 }
448
449 if (CantAnalyze) {
450 // We may not be able to analyze the block, but we could still have
451 // an unconditional branch as the last instruction in the block, which
452 // just branches to layout successor. If this is the case, then just
453 // remove it if we're allowed to make modifications.
454 if (AllowModify && !isPredicated(MBB.back()) &&
458 return true;
459 }
460
461 if (I == MBB.instr_begin())
462 return false;
463
464 --I;
465 }
466
467 // We made it past the terminators without bailing out - we must have
468 // analyzed this branch successfully.
469 return false;
470}
471
473 int *BytesRemoved) const {
474 assert(!BytesRemoved && "code size not handled");
475
477 if (I == MBB.end())
478 return 0;
479
480 if (!isUncondBranchOpcode(I->getOpcode()) &&
481 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
482 return 0;
483
484 // Remove the branch.
485 I->eraseFromParent();
486
487 I = MBB.end();
488
489 if (I == MBB.begin()) return 1;
490 --I;
491 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
492 return 1;
493
494 // Remove the branch.
495 I->eraseFromParent();
496 return 2;
497}
498
503 const DebugLoc &DL,
504 int *BytesAdded) const {
505 assert(!BytesAdded && "code size not handled");
507 int BOpc = !AFI->isThumbFunction()
508 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
509 int BccOpc = !AFI->isThumbFunction()
510 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
511 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
512
513 // Shouldn't be a fall through.
514 assert(TBB && "insertBranch must not be told to insert a fallthrough");
515 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
516 "ARM branch conditions have two or three components!");
517
518 // For conditional branches, we use addOperand to preserve CPSR flags.
519
520 if (!FBB) {
521 if (Cond.empty()) { // Unconditional branch?
522 if (isThumb)
524 else
525 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
526 } else if (Cond.size() == 2) {
527 BuildMI(&MBB, DL, get(BccOpc))
528 .addMBB(TBB)
529 .addImm(Cond[0].getImm())
530 .add(Cond[1]);
531 } else
532 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
533 return 1;
534 }
535
536 // Two-way conditional branch.
537 if (Cond.size() == 2)
538 BuildMI(&MBB, DL, get(BccOpc))
539 .addMBB(TBB)
540 .addImm(Cond[0].getImm())
541 .add(Cond[1]);
542 else if (Cond.size() == 3)
543 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
544 if (isThumb)
545 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
546 else
547 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
548 return 2;
549}
550
553 if (Cond.size() == 2) {
554 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
556 return false;
557 }
558 return true;
559}
560
562 if (MI.isBundle()) {
564 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
565 while (++I != E && I->isInsideBundle()) {
566 int PIdx = I->findFirstPredOperandIdx();
567 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
568 return true;
569 }
570 return false;
571 }
572
573 int PIdx = MI.findFirstPredOperandIdx();
574 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
575}
576
578 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
579 const TargetRegisterInfo *TRI) const {
580
581 // First, let's see if there is a generic comment for this operand
582 std::string GenericComment =
584 if (!GenericComment.empty())
585 return GenericComment;
586
587 // If not, check if we have an immediate operand.
588 if (!Op.isImm())
589 return std::string();
590
591 // And print its corresponding condition code if the immediate is a
592 // predicate.
593 int FirstPredOp = MI.findFirstPredOperandIdx();
594 if (FirstPredOp != (int) OpIdx)
595 return std::string();
596
597 std::string CC = "CC::";
599 return CC;
600}
601
604 unsigned Opc = MI.getOpcode();
605 if (isUncondBranchOpcode(Opc)) {
606 MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
607 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
608 .addImm(Pred[0].getImm())
609 .addReg(Pred[1].getReg());
610 return true;
611 }
612
613 int PIdx = MI.findFirstPredOperandIdx();
614 if (PIdx != -1) {
615 MachineOperand &PMO = MI.getOperand(PIdx);
616 PMO.setImm(Pred[0].getImm());
617 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
618
619 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
620 // IT block. This affects how they are printed.
621 const MCInstrDesc &MCID = MI.getDesc();
623 assert(MCID.operands()[1].isOptionalDef() &&
624 "CPSR def isn't expected operand");
625 assert((MI.getOperand(1).isDead() ||
626 MI.getOperand(1).getReg() != ARM::CPSR) &&
627 "if conversion tried to stop defining used CPSR");
628 MI.getOperand(1).setReg(ARM::NoRegister);
629 }
630
631 return true;
632 }
633 return false;
634}
635
637 ArrayRef<MachineOperand> Pred2) const {
638 if (Pred1.size() > 2 || Pred2.size() > 2)
639 return false;
640
641 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
642 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
643 if (CC1 == CC2)
644 return true;
645
646 switch (CC1) {
647 default:
648 return false;
649 case ARMCC::AL:
650 return true;
651 case ARMCC::HS:
652 return CC2 == ARMCC::HI;
653 case ARMCC::LS:
654 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
655 case ARMCC::GE:
656 return CC2 == ARMCC::GT;
657 case ARMCC::LE:
658 return CC2 == ARMCC::LT;
659 }
660}
661
663 std::vector<MachineOperand> &Pred,
664 bool SkipDead) const {
665 bool Found = false;
666 for (const MachineOperand &MO : MI.operands()) {
667 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
668 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
669 if (ClobbersCPSR || IsCPSR) {
670
671 // Filter out T1 instructions that have a dead CPSR,
672 // allowing IT blocks to be generated containing T1 instructions
673 const MCInstrDesc &MCID = MI.getDesc();
674 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
675 SkipDead)
676 continue;
677
678 Pred.push_back(MO);
679 Found = true;
680 }
681 }
682
683 return Found;
684}
685
687 for (const auto &MO : MI.operands())
688 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
689 return true;
690 return false;
691}
692
694 switch (MI->getOpcode()) {
695 default: return true;
696 case ARM::tADC: // ADC (register) T1
697 case ARM::tADDi3: // ADD (immediate) T1
698 case ARM::tADDi8: // ADD (immediate) T2
699 case ARM::tADDrr: // ADD (register) T1
700 case ARM::tAND: // AND (register) T1
701 case ARM::tASRri: // ASR (immediate) T1
702 case ARM::tASRrr: // ASR (register) T1
703 case ARM::tBIC: // BIC (register) T1
704 case ARM::tEOR: // EOR (register) T1
705 case ARM::tLSLri: // LSL (immediate) T1
706 case ARM::tLSLrr: // LSL (register) T1
707 case ARM::tLSRri: // LSR (immediate) T1
708 case ARM::tLSRrr: // LSR (register) T1
709 case ARM::tMUL: // MUL T1
710 case ARM::tMVN: // MVN (register) T1
711 case ARM::tORR: // ORR (register) T1
712 case ARM::tROR: // ROR (register) T1
713 case ARM::tRSB: // RSB (immediate) T1
714 case ARM::tSBC: // SBC (register) T1
715 case ARM::tSUBi3: // SUB (immediate) T1
716 case ARM::tSUBi8: // SUB (immediate) T2
717 case ARM::tSUBrr: // SUB (register) T1
719 }
720}
721
722/// isPredicable - Return true if the specified instruction can be predicated.
723/// By default, this returns true for every instruction with a
724/// PredicateOperand.
726 if (!MI.isPredicable())
727 return false;
728
729 if (MI.isBundle())
730 return false;
731
733 return false;
734
735 const MachineFunction *MF = MI.getParent()->getParent();
736 const ARMFunctionInfo *AFI =
738
739 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
740 // In their ARM encoding, they can't be encoded in a conditional form.
741 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
742 return false;
743
744 // Make indirect control flow changes unpredicable when SLS mitigation is
745 // enabled.
746 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
747 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
748 return false;
749 if (ST.hardenSlsBlr() && isIndirectCall(MI))
750 return false;
751
752 if (AFI->isThumb2Function()) {
753 if (getSubtarget().restrictIT())
754 return isV8EligibleForIT(&MI);
755 }
756
757 return true;
758}
759
760namespace llvm {
761
762template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
763 for (const MachineOperand &MO : MI->operands()) {
764 if (!MO.isReg() || MO.isUndef() || MO.isUse())
765 continue;
766 if (MO.getReg() != ARM::CPSR)
767 continue;
768 if (!MO.isDead())
769 return false;
770 }
771 // all definitions of CPSR are dead
772 return true;
773}
774
775} // end namespace llvm
776
777/// GetInstSize - Return the size of the specified MachineInstr.
778///
780 const MachineBasicBlock &MBB = *MI.getParent();
781 const MachineFunction *MF = MBB.getParent();
782 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
783
784 const MCInstrDesc &MCID = MI.getDesc();
785
786 switch (MI.getOpcode()) {
787 default:
788 // Return the size specified in .td file. If there's none, return 0, as we
789 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
790 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
791 // contrast to AArch64 instructions which have a default size of 4 bytes for
792 // example.
793 return MCID.getSize();
794 case TargetOpcode::BUNDLE:
795 return getInstBundleLength(MI);
796 case ARM::CONSTPOOL_ENTRY:
797 case ARM::JUMPTABLE_INSTS:
798 case ARM::JUMPTABLE_ADDRS:
799 case ARM::JUMPTABLE_TBB:
800 case ARM::JUMPTABLE_TBH:
801 // If this machine instr is a constant pool entry, its size is recorded as
802 // operand #2.
803 return MI.getOperand(2).getImm();
804 case ARM::SPACE:
805 return MI.getOperand(1).getImm();
806 case ARM::INLINEASM:
807 case ARM::INLINEASM_BR: {
808 // If this machine instr is an inline asm, measure it.
809 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
811 Size = alignTo(Size, 4);
812 return Size;
813 }
814 }
815}
816
817unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
818 unsigned Size = 0;
820 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
821 while (++I != E && I->isInsideBundle()) {
822 assert(!I->isBundle() && "No nested bundle!");
824 }
825 return Size;
826}
827
830 unsigned DestReg, bool KillSrc,
831 const ARMSubtarget &Subtarget) const {
832 unsigned Opc = Subtarget.isThumb()
833 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
834 : ARM::MRS;
835
837 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
838
839 // There is only 1 A/R class MRS instruction, and it always refers to
840 // APSR. However, there are lots of other possibilities on M-class cores.
841 if (Subtarget.isMClass())
842 MIB.addImm(0x800);
843
844 MIB.add(predOps(ARMCC::AL))
845 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
846}
847
850 unsigned SrcReg, bool KillSrc,
851 const ARMSubtarget &Subtarget) const {
852 unsigned Opc = Subtarget.isThumb()
853 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
854 : ARM::MSR;
855
856 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
857
858 if (Subtarget.isMClass())
859 MIB.addImm(0x800);
860 else
861 MIB.addImm(8);
862
863 MIB.addReg(SrcReg, getKillRegState(KillSrc))
866}
867
869 MIB.addImm(ARMVCC::None);
870 MIB.addReg(0);
871 MIB.addReg(0); // tp_reg
872}
873
875 Register DestReg) {
877 MIB.addReg(DestReg, RegState::Undef);
878}
879
881 MIB.addImm(Cond);
882 MIB.addReg(ARM::VPR, RegState::Implicit);
883 MIB.addReg(0); // tp_reg
884}
885
887 unsigned Cond, unsigned Inactive) {
889 MIB.addReg(Inactive);
890}
891
894 const DebugLoc &DL, MCRegister DestReg,
895 MCRegister SrcReg, bool KillSrc) const {
896 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
897 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
898
899 if (GPRDest && GPRSrc) {
900 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
901 .addReg(SrcReg, getKillRegState(KillSrc))
903 .add(condCodeOp());
904 return;
905 }
906
907 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
908 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
909
910 unsigned Opc = 0;
911 if (SPRDest && SPRSrc)
912 Opc = ARM::VMOVS;
913 else if (GPRDest && SPRSrc)
914 Opc = ARM::VMOVRS;
915 else if (SPRDest && GPRSrc)
916 Opc = ARM::VMOVSR;
917 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
918 Opc = ARM::VMOVD;
919 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
920 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
921
922 if (Opc) {
923 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
924 MIB.addReg(SrcReg, getKillRegState(KillSrc));
925 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
926 MIB.addReg(SrcReg, getKillRegState(KillSrc));
927 if (Opc == ARM::MVE_VORR)
928 addUnpredicatedMveVpredROp(MIB, DestReg);
929 else if (Opc != ARM::MQPRCopy)
930 MIB.add(predOps(ARMCC::AL));
931 return;
932 }
933
934 // Handle register classes that require multiple instructions.
935 unsigned BeginIdx = 0;
936 unsigned SubRegs = 0;
937 int Spacing = 1;
938
939 // Use VORRq when possible.
940 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
941 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
942 BeginIdx = ARM::qsub_0;
943 SubRegs = 2;
944 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
945 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
946 BeginIdx = ARM::qsub_0;
947 SubRegs = 4;
948 // Fall back to VMOVD.
949 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
950 Opc = ARM::VMOVD;
951 BeginIdx = ARM::dsub_0;
952 SubRegs = 2;
953 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
954 Opc = ARM::VMOVD;
955 BeginIdx = ARM::dsub_0;
956 SubRegs = 3;
957 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
958 Opc = ARM::VMOVD;
959 BeginIdx = ARM::dsub_0;
960 SubRegs = 4;
961 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
962 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
963 BeginIdx = ARM::gsub_0;
964 SubRegs = 2;
965 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
966 Opc = ARM::VMOVD;
967 BeginIdx = ARM::dsub_0;
968 SubRegs = 2;
969 Spacing = 2;
970 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
971 Opc = ARM::VMOVD;
972 BeginIdx = ARM::dsub_0;
973 SubRegs = 3;
974 Spacing = 2;
975 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
976 Opc = ARM::VMOVD;
977 BeginIdx = ARM::dsub_0;
978 SubRegs = 4;
979 Spacing = 2;
980 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
981 !Subtarget.hasFP64()) {
982 Opc = ARM::VMOVS;
983 BeginIdx = ARM::ssub_0;
984 SubRegs = 2;
985 } else if (SrcReg == ARM::CPSR) {
986 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
987 return;
988 } else if (DestReg == ARM::CPSR) {
989 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
990 return;
991 } else if (DestReg == ARM::VPR) {
992 assert(ARM::GPRRegClass.contains(SrcReg));
993 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
994 .addReg(SrcReg, getKillRegState(KillSrc))
996 return;
997 } else if (SrcReg == ARM::VPR) {
998 assert(ARM::GPRRegClass.contains(DestReg));
999 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
1000 .addReg(SrcReg, getKillRegState(KillSrc))
1002 return;
1003 } else if (DestReg == ARM::FPSCR_NZCV) {
1004 assert(ARM::GPRRegClass.contains(SrcReg));
1005 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1006 .addReg(SrcReg, getKillRegState(KillSrc))
1008 return;
1009 } else if (SrcReg == ARM::FPSCR_NZCV) {
1010 assert(ARM::GPRRegClass.contains(DestReg));
1011 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1012 .addReg(SrcReg, getKillRegState(KillSrc))
1014 return;
1015 }
1016
1017 assert(Opc && "Impossible reg-to-reg copy");
1018
1021
1022 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1023 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1024 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1025 Spacing = -Spacing;
1026 }
1027#ifndef NDEBUG
1028 SmallSet<unsigned, 4> DstRegs;
1029#endif
1030 for (unsigned i = 0; i != SubRegs; ++i) {
1031 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1032 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1033 assert(Dst && Src && "Bad sub-register");
1034#ifndef NDEBUG
1035 assert(!DstRegs.count(Src) && "destructive vector copy");
1036 DstRegs.insert(Dst);
1037#endif
1038 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1039 // VORR (NEON or MVE) takes two source operands.
1040 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1041 Mov.addReg(Src);
1042 }
1043 // MVE VORR takes predicate operands in place of an ordinary condition.
1044 if (Opc == ARM::MVE_VORR)
1046 else
1047 Mov = Mov.add(predOps(ARMCC::AL));
1048 // MOVr can set CC.
1049 if (Opc == ARM::MOVr)
1050 Mov = Mov.add(condCodeOp());
1051 }
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov->addRegisterDefined(DestReg, TRI);
1054 if (KillSrc)
1055 Mov->addRegisterKilled(SrcReg, TRI);
1056}
1057
1058std::optional<DestSourcePair>
1060 // VMOVRRD is also a copy instruction but it requires
1061 // special way of handling. It is more complex copy version
1062 // and since that we are not considering it. For recognition
1063 // of such instruction isExtractSubregLike MI interface fuction
1064 // could be used.
1065 // VORRq is considered as a move only if two inputs are
1066 // the same register.
1067 if (!MI.isMoveReg() ||
1068 (MI.getOpcode() == ARM::VORRq &&
1069 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1070 return std::nullopt;
1071 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1072}
1073
1074std::optional<ParamLoadedValue>
1076 Register Reg) const {
1077 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1078 Register DstReg = DstSrcPair->Destination->getReg();
1079
1080 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1081 // than the copy registers. Consider for example:
1082 //
1083 // s16 = VMOVS s0
1084 // s17 = VMOVS s1
1085 // call @callee(d0)
1086 //
1087 // We'd like to describe the call site value of d0 as d8, but this requires
1088 // gathering and merging the descriptions for the two VMOVS instructions.
1089 //
1090 // We also don't handle the reverse situation, where the forwarding reg is
1091 // narrower than the copy destination:
1092 //
1093 // d8 = VMOVD d0
1094 // call @callee(s1)
1095 //
1096 // We need to produce a fragment description (the call site value of s1 is
1097 // /not/ just d8).
1098 if (DstReg != Reg)
1099 return std::nullopt;
1100 }
1102}
1103
1104const MachineInstrBuilder &
1106 unsigned SubIdx, unsigned State,
1107 const TargetRegisterInfo *TRI) const {
1108 if (!SubIdx)
1109 return MIB.addReg(Reg, State);
1110
1112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1113 return MIB.addReg(Reg, State, SubIdx);
1114}
1115
1118 Register SrcReg, bool isKill, int FI,
1119 const TargetRegisterClass *RC,
1120 const TargetRegisterInfo *TRI,
1121 Register VReg) const {
1122 MachineFunction &MF = *MBB.getParent();
1123 MachineFrameInfo &MFI = MF.getFrameInfo();
1124 Align Alignment = MFI.getObjectAlign(FI);
1125
1128 MFI.getObjectSize(FI), Alignment);
1129
1130 switch (TRI->getSpillSize(*RC)) {
1131 case 2:
1132 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1133 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1134 .addReg(SrcReg, getKillRegState(isKill))
1135 .addFrameIndex(FI)
1136 .addImm(0)
1137 .addMemOperand(MMO)
1139 } else
1140 llvm_unreachable("Unknown reg class!");
1141 break;
1142 case 4:
1143 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1144 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1145 .addReg(SrcReg, getKillRegState(isKill))
1146 .addFrameIndex(FI)
1147 .addImm(0)
1148 .addMemOperand(MMO)
1150 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1151 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1152 .addReg(SrcReg, getKillRegState(isKill))
1153 .addFrameIndex(FI)
1154 .addImm(0)
1155 .addMemOperand(MMO)
1157 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1158 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1159 .addReg(SrcReg, getKillRegState(isKill))
1160 .addFrameIndex(FI)
1161 .addImm(0)
1162 .addMemOperand(MMO)
1164 } else
1165 llvm_unreachable("Unknown reg class!");
1166 break;
1167 case 8:
1168 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1169 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1170 .addReg(SrcReg, getKillRegState(isKill))
1171 .addFrameIndex(FI)
1172 .addImm(0)
1173 .addMemOperand(MMO)
1175 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1176 if (Subtarget.hasV5TEOps()) {
1177 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1178 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1179 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1180 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1182 } else {
1183 // Fallback to STM instruction, which has existed since the dawn of
1184 // time.
1185 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1186 .addFrameIndex(FI)
1187 .addMemOperand(MMO)
1189 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1190 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1191 }
1192 } else
1193 llvm_unreachable("Unknown reg class!");
1194 break;
1195 case 16:
1196 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1197 // Use aligned spills if the stack can be realigned.
1198 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1199 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1200 .addFrameIndex(FI)
1201 .addImm(16)
1202 .addReg(SrcReg, getKillRegState(isKill))
1203 .addMemOperand(MMO)
1205 } else {
1206 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1207 .addReg(SrcReg, getKillRegState(isKill))
1208 .addFrameIndex(FI)
1209 .addMemOperand(MMO)
1211 }
1212 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1213 Subtarget.hasMVEIntegerOps()) {
1214 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1215 MIB.addReg(SrcReg, getKillRegState(isKill))
1216 .addFrameIndex(FI)
1217 .addImm(0)
1218 .addMemOperand(MMO);
1220 } else
1221 llvm_unreachable("Unknown reg class!");
1222 break;
1223 case 24:
1224 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1225 // Use aligned spills if the stack can be realigned.
1226 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1227 Subtarget.hasNEON()) {
1228 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1229 .addFrameIndex(FI)
1230 .addImm(16)
1231 .addReg(SrcReg, getKillRegState(isKill))
1232 .addMemOperand(MMO)
1234 } else {
1236 get(ARM::VSTMDIA))
1237 .addFrameIndex(FI)
1239 .addMemOperand(MMO);
1240 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1241 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1242 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1243 }
1244 } else
1245 llvm_unreachable("Unknown reg class!");
1246 break;
1247 case 32:
1248 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1249 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1250 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1251 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1252 Subtarget.hasNEON()) {
1253 // FIXME: It's possible to only store part of the QQ register if the
1254 // spilled def has a sub-register index.
1255 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1256 .addFrameIndex(FI)
1257 .addImm(16)
1258 .addReg(SrcReg, getKillRegState(isKill))
1259 .addMemOperand(MMO)
1261 } else if (Subtarget.hasMVEIntegerOps()) {
1262 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1263 .addReg(SrcReg, getKillRegState(isKill))
1264 .addFrameIndex(FI)
1265 .addMemOperand(MMO);
1266 } else {
1268 get(ARM::VSTMDIA))
1269 .addFrameIndex(FI)
1271 .addMemOperand(MMO);
1272 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1273 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1274 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1275 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1276 }
1277 } else
1278 llvm_unreachable("Unknown reg class!");
1279 break;
1280 case 64:
1281 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1282 Subtarget.hasMVEIntegerOps()) {
1283 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1284 .addReg(SrcReg, getKillRegState(isKill))
1285 .addFrameIndex(FI)
1286 .addMemOperand(MMO);
1287 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1288 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1289 .addFrameIndex(FI)
1291 .addMemOperand(MMO);
1292 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1293 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1294 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1295 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1296 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1297 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1298 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1299 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1300 } else
1301 llvm_unreachable("Unknown reg class!");
1302 break;
1303 default:
1304 llvm_unreachable("Unknown reg class!");
1305 }
1306}
1307
1309 int &FrameIndex) const {
1310 switch (MI.getOpcode()) {
1311 default: break;
1312 case ARM::STRrs:
1313 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1314 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1315 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1316 MI.getOperand(3).getImm() == 0) {
1317 FrameIndex = MI.getOperand(1).getIndex();
1318 return MI.getOperand(0).getReg();
1319 }
1320 break;
1321 case ARM::STRi12:
1322 case ARM::t2STRi12:
1323 case ARM::tSTRspi:
1324 case ARM::VSTRD:
1325 case ARM::VSTRS:
1326 case ARM::VSTR_P0_off:
1327 case ARM::MVE_VSTRWU32:
1328 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1329 MI.getOperand(2).getImm() == 0) {
1330 FrameIndex = MI.getOperand(1).getIndex();
1331 return MI.getOperand(0).getReg();
1332 }
1333 break;
1334 case ARM::VST1q64:
1335 case ARM::VST1d64TPseudo:
1336 case ARM::VST1d64QPseudo:
1337 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1338 FrameIndex = MI.getOperand(0).getIndex();
1339 return MI.getOperand(2).getReg();
1340 }
1341 break;
1342 case ARM::VSTMQIA:
1343 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1344 FrameIndex = MI.getOperand(1).getIndex();
1345 return MI.getOperand(0).getReg();
1346 }
1347 break;
1348 case ARM::MQQPRStore:
1349 case ARM::MQQQQPRStore:
1350 if (MI.getOperand(1).isFI()) {
1351 FrameIndex = MI.getOperand(1).getIndex();
1352 return MI.getOperand(0).getReg();
1353 }
1354 break;
1355 }
1356
1357 return 0;
1358}
1359
1361 int &FrameIndex) const {
1363 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1364 Accesses.size() == 1) {
1365 FrameIndex =
1366 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1367 ->getFrameIndex();
1368 return true;
1369 }
1370 return false;
1371}
1372
1375 Register DestReg, int FI,
1376 const TargetRegisterClass *RC,
1377 const TargetRegisterInfo *TRI,
1378 Register VReg) const {
1379 DebugLoc DL;
1380 if (I != MBB.end()) DL = I->getDebugLoc();
1381 MachineFunction &MF = *MBB.getParent();
1382 MachineFrameInfo &MFI = MF.getFrameInfo();
1383 const Align Alignment = MFI.getObjectAlign(FI);
1386 MFI.getObjectSize(FI), Alignment);
1387
1388 switch (TRI->getSpillSize(*RC)) {
1389 case 2:
1390 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1391 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1392 .addFrameIndex(FI)
1393 .addImm(0)
1394 .addMemOperand(MMO)
1396 } else
1397 llvm_unreachable("Unknown reg class!");
1398 break;
1399 case 4:
1400 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1401 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1402 .addFrameIndex(FI)
1403 .addImm(0)
1404 .addMemOperand(MMO)
1406 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1407 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1408 .addFrameIndex(FI)
1409 .addImm(0)
1410 .addMemOperand(MMO)
1412 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1413 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1414 .addFrameIndex(FI)
1415 .addImm(0)
1416 .addMemOperand(MMO)
1418 } else
1419 llvm_unreachable("Unknown reg class!");
1420 break;
1421 case 8:
1422 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1423 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1424 .addFrameIndex(FI)
1425 .addImm(0)
1426 .addMemOperand(MMO)
1428 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1430
1431 if (Subtarget.hasV5TEOps()) {
1432 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1433 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1434 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1435 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1437 } else {
1438 // Fallback to LDM instruction, which has existed since the dawn of
1439 // time.
1440 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1441 .addFrameIndex(FI)
1442 .addMemOperand(MMO)
1444 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1445 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1446 }
1447
1448 if (DestReg.isPhysical())
1449 MIB.addReg(DestReg, RegState::ImplicitDefine);
1450 } else
1451 llvm_unreachable("Unknown reg class!");
1452 break;
1453 case 16:
1454 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1455 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1456 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1457 .addFrameIndex(FI)
1458 .addImm(16)
1459 .addMemOperand(MMO)
1461 } else {
1462 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1463 .addFrameIndex(FI)
1464 .addMemOperand(MMO)
1466 }
1467 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1468 Subtarget.hasMVEIntegerOps()) {
1469 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1470 MIB.addFrameIndex(FI)
1471 .addImm(0)
1472 .addMemOperand(MMO);
1474 } else
1475 llvm_unreachable("Unknown reg class!");
1476 break;
1477 case 24:
1478 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1479 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1480 Subtarget.hasNEON()) {
1481 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1482 .addFrameIndex(FI)
1483 .addImm(16)
1484 .addMemOperand(MMO)
1486 } else {
1487 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1488 .addFrameIndex(FI)
1489 .addMemOperand(MMO)
1491 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1492 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1493 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1494 if (DestReg.isPhysical())
1495 MIB.addReg(DestReg, RegState::ImplicitDefine);
1496 }
1497 } else
1498 llvm_unreachable("Unknown reg class!");
1499 break;
1500 case 32:
1501 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1502 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1503 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1504 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1505 Subtarget.hasNEON()) {
1506 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1507 .addFrameIndex(FI)
1508 .addImm(16)
1509 .addMemOperand(MMO)
1511 } else if (Subtarget.hasMVEIntegerOps()) {
1512 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1513 .addFrameIndex(FI)
1514 .addMemOperand(MMO);
1515 } else {
1516 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1517 .addFrameIndex(FI)
1519 .addMemOperand(MMO);
1520 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1521 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1522 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1523 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1524 if (DestReg.isPhysical())
1525 MIB.addReg(DestReg, RegState::ImplicitDefine);
1526 }
1527 } else
1528 llvm_unreachable("Unknown reg class!");
1529 break;
1530 case 64:
1531 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1532 Subtarget.hasMVEIntegerOps()) {
1533 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1534 .addFrameIndex(FI)
1535 .addMemOperand(MMO);
1536 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1537 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1538 .addFrameIndex(FI)
1540 .addMemOperand(MMO);
1541 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1542 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1543 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1544 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1545 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1546 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1547 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1548 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1549 if (DestReg.isPhysical())
1550 MIB.addReg(DestReg, RegState::ImplicitDefine);
1551 } else
1552 llvm_unreachable("Unknown reg class!");
1553 break;
1554 default:
1555 llvm_unreachable("Unknown regclass!");
1556 }
1557}
1558
1560 int &FrameIndex) const {
1561 switch (MI.getOpcode()) {
1562 default: break;
1563 case ARM::LDRrs:
1564 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1565 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1566 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1567 MI.getOperand(3).getImm() == 0) {
1568 FrameIndex = MI.getOperand(1).getIndex();
1569 return MI.getOperand(0).getReg();
1570 }
1571 break;
1572 case ARM::LDRi12:
1573 case ARM::t2LDRi12:
1574 case ARM::tLDRspi:
1575 case ARM::VLDRD:
1576 case ARM::VLDRS:
1577 case ARM::VLDR_P0_off:
1578 case ARM::MVE_VLDRWU32:
1579 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1580 MI.getOperand(2).getImm() == 0) {
1581 FrameIndex = MI.getOperand(1).getIndex();
1582 return MI.getOperand(0).getReg();
1583 }
1584 break;
1585 case ARM::VLD1q64:
1586 case ARM::VLD1d8TPseudo:
1587 case ARM::VLD1d16TPseudo:
1588 case ARM::VLD1d32TPseudo:
1589 case ARM::VLD1d64TPseudo:
1590 case ARM::VLD1d8QPseudo:
1591 case ARM::VLD1d16QPseudo:
1592 case ARM::VLD1d32QPseudo:
1593 case ARM::VLD1d64QPseudo:
1594 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1595 FrameIndex = MI.getOperand(1).getIndex();
1596 return MI.getOperand(0).getReg();
1597 }
1598 break;
1599 case ARM::VLDMQIA:
1600 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1601 FrameIndex = MI.getOperand(1).getIndex();
1602 return MI.getOperand(0).getReg();
1603 }
1604 break;
1605 case ARM::MQQPRLoad:
1606 case ARM::MQQQQPRLoad:
1607 if (MI.getOperand(1).isFI()) {
1608 FrameIndex = MI.getOperand(1).getIndex();
1609 return MI.getOperand(0).getReg();
1610 }
1611 break;
1612 }
1613
1614 return 0;
1615}
1616
1618 int &FrameIndex) const {
1620 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1621 Accesses.size() == 1) {
1622 FrameIndex =
1623 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1624 ->getFrameIndex();
1625 return true;
1626 }
1627 return false;
1628}
1629
1630/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1631/// depending on whether the result is used.
1632void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1633 bool isThumb1 = Subtarget.isThumb1Only();
1634 bool isThumb2 = Subtarget.isThumb2();
1635 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1636
1637 DebugLoc dl = MI->getDebugLoc();
1638 MachineBasicBlock *BB = MI->getParent();
1639
1640 MachineInstrBuilder LDM, STM;
1641 if (isThumb1 || !MI->getOperand(1).isDead()) {
1642 MachineOperand LDWb(MI->getOperand(1));
1643 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1644 : isThumb1 ? ARM::tLDMIA_UPD
1645 : ARM::LDMIA_UPD))
1646 .add(LDWb);
1647 } else {
1648 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1649 }
1650
1651 if (isThumb1 || !MI->getOperand(0).isDead()) {
1652 MachineOperand STWb(MI->getOperand(0));
1653 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1654 : isThumb1 ? ARM::tSTMIA_UPD
1655 : ARM::STMIA_UPD))
1656 .add(STWb);
1657 } else {
1658 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1659 }
1660
1661 MachineOperand LDBase(MI->getOperand(3));
1662 LDM.add(LDBase).add(predOps(ARMCC::AL));
1663
1664 MachineOperand STBase(MI->getOperand(2));
1665 STM.add(STBase).add(predOps(ARMCC::AL));
1666
1667 // Sort the scratch registers into ascending order.
1669 SmallVector<unsigned, 6> ScratchRegs;
1670 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1671 ScratchRegs.push_back(MO.getReg());
1672 llvm::sort(ScratchRegs,
1673 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1674 return TRI.getEncodingValue(Reg1) <
1675 TRI.getEncodingValue(Reg2);
1676 });
1677
1678 for (const auto &Reg : ScratchRegs) {
1679 LDM.addReg(Reg, RegState::Define);
1680 STM.addReg(Reg, RegState::Kill);
1681 }
1682
1683 BB->erase(MI);
1684}
1685
1687 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1688 expandLoadStackGuard(MI);
1689 MI.getParent()->erase(MI);
1690 return true;
1691 }
1692
1693 if (MI.getOpcode() == ARM::MEMCPY) {
1694 expandMEMCPY(MI);
1695 return true;
1696 }
1697
1698 // This hook gets to expand COPY instructions before they become
1699 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1700 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1701 // changed into a VORR that can go down the NEON pipeline.
1702 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1703 return false;
1704
1705 // Look for a copy between even S-registers. That is where we keep floats
1706 // when using NEON v2f32 instructions for f32 arithmetic.
1707 Register DstRegS = MI.getOperand(0).getReg();
1708 Register SrcRegS = MI.getOperand(1).getReg();
1709 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1710 return false;
1711
1713 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1714 &ARM::DPRRegClass);
1715 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1716 &ARM::DPRRegClass);
1717 if (!DstRegD || !SrcRegD)
1718 return false;
1719
1720 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1721 // legal if the COPY already defines the full DstRegD, and it isn't a
1722 // sub-register insertion.
1723 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1724 return false;
1725
1726 // A dead copy shouldn't show up here, but reject it just in case.
1727 if (MI.getOperand(0).isDead())
1728 return false;
1729
1730 // All clear, widen the COPY.
1731 LLVM_DEBUG(dbgs() << "widening: " << MI);
1732 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1733
1734 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1735 // or some other super-register.
1736 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1737 if (ImpDefIdx != -1)
1738 MI.removeOperand(ImpDefIdx);
1739
1740 // Change the opcode and operands.
1741 MI.setDesc(get(ARM::VMOVD));
1742 MI.getOperand(0).setReg(DstRegD);
1743 MI.getOperand(1).setReg(SrcRegD);
1744 MIB.add(predOps(ARMCC::AL));
1745
1746 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1747 // register scavenger and machine verifier, so we need to indicate that we
1748 // are reading an undefined value from SrcRegD, but a proper value from
1749 // SrcRegS.
1750 MI.getOperand(1).setIsUndef();
1751 MIB.addReg(SrcRegS, RegState::Implicit);
1752
1753 // SrcRegD may actually contain an unrelated value in the ssub_1
1754 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1755 if (MI.getOperand(1).isKill()) {
1756 MI.getOperand(1).setIsKill(false);
1757 MI.addRegisterKilled(SrcRegS, TRI, true);
1758 }
1759
1760 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1761 return true;
1762}
1763
1764/// Create a copy of a const pool value. Update CPI to the new index and return
1765/// the label UID.
1766static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1769
1770 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1771 assert(MCPE.isMachineConstantPoolEntry() &&
1772 "Expecting a machine constantpool entry!");
1773 ARMConstantPoolValue *ACPV =
1774 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1775
1776 unsigned PCLabelId = AFI->createPICLabelUId();
1777 ARMConstantPoolValue *NewCPV = nullptr;
1778
1779 // FIXME: The below assumes PIC relocation model and that the function
1780 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1781 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1782 // instructions, so that's probably OK, but is PIC always correct when
1783 // we get here?
1784 if (ACPV->isGlobalValue())
1786 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1787 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1788 else if (ACPV->isExtSymbol())
1789 NewCPV = ARMConstantPoolSymbol::
1790 Create(MF.getFunction().getContext(),
1791 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1792 else if (ACPV->isBlockAddress())
1793 NewCPV = ARMConstantPoolConstant::
1794 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1796 else if (ACPV->isLSDA())
1797 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1798 ARMCP::CPLSDA, 4);
1799 else if (ACPV->isMachineBasicBlock())
1800 NewCPV = ARMConstantPoolMBB::
1801 Create(MF.getFunction().getContext(),
1802 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1803 else
1804 llvm_unreachable("Unexpected ARM constantpool value type!!");
1805 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1806 return PCLabelId;
1807}
1808
1811 Register DestReg, unsigned SubIdx,
1812 const MachineInstr &Orig,
1813 const TargetRegisterInfo &TRI) const {
1814 unsigned Opcode = Orig.getOpcode();
1815 switch (Opcode) {
1816 default: {
1818 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1819 MBB.insert(I, MI);
1820 break;
1821 }
1822 case ARM::tLDRpci_pic:
1823 case ARM::t2LDRpci_pic: {
1824 MachineFunction &MF = *MBB.getParent();
1825 unsigned CPI = Orig.getOperand(1).getIndex();
1826 unsigned PCLabelId = duplicateCPV(MF, CPI);
1827 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1829 .addImm(PCLabelId)
1830 .cloneMemRefs(Orig);
1831 break;
1832 }
1833 }
1834}
1835
1838 MachineBasicBlock::iterator InsertBefore,
1839 const MachineInstr &Orig) const {
1840 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1842 for (;;) {
1843 switch (I->getOpcode()) {
1844 case ARM::tLDRpci_pic:
1845 case ARM::t2LDRpci_pic: {
1846 MachineFunction &MF = *MBB.getParent();
1847 unsigned CPI = I->getOperand(1).getIndex();
1848 unsigned PCLabelId = duplicateCPV(MF, CPI);
1849 I->getOperand(1).setIndex(CPI);
1850 I->getOperand(2).setImm(PCLabelId);
1851 break;
1852 }
1853 }
1854 if (!I->isBundledWithSucc())
1855 break;
1856 ++I;
1857 }
1858 return Cloned;
1859}
1860
1862 const MachineInstr &MI1,
1863 const MachineRegisterInfo *MRI) const {
1864 unsigned Opcode = MI0.getOpcode();
1865 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1866 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1867 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1868 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1869 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1870 Opcode == ARM::t2MOV_ga_pcrel) {
1871 if (MI1.getOpcode() != Opcode)
1872 return false;
1873 if (MI0.getNumOperands() != MI1.getNumOperands())
1874 return false;
1875
1876 const MachineOperand &MO0 = MI0.getOperand(1);
1877 const MachineOperand &MO1 = MI1.getOperand(1);
1878 if (MO0.getOffset() != MO1.getOffset())
1879 return false;
1880
1881 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1882 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1883 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1884 Opcode == ARM::t2MOV_ga_pcrel)
1885 // Ignore the PC labels.
1886 return MO0.getGlobal() == MO1.getGlobal();
1887
1888 const MachineFunction *MF = MI0.getParent()->getParent();
1889 const MachineConstantPool *MCP = MF->getConstantPool();
1890 int CPI0 = MO0.getIndex();
1891 int CPI1 = MO1.getIndex();
1892 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1893 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1894 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1895 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1896 if (isARMCP0 && isARMCP1) {
1897 ARMConstantPoolValue *ACPV0 =
1898 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1899 ARMConstantPoolValue *ACPV1 =
1900 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1901 return ACPV0->hasSameValue(ACPV1);
1902 } else if (!isARMCP0 && !isARMCP1) {
1903 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1904 }
1905 return false;
1906 } else if (Opcode == ARM::PICLDR) {
1907 if (MI1.getOpcode() != Opcode)
1908 return false;
1909 if (MI0.getNumOperands() != MI1.getNumOperands())
1910 return false;
1911
1912 Register Addr0 = MI0.getOperand(1).getReg();
1913 Register Addr1 = MI1.getOperand(1).getReg();
1914 if (Addr0 != Addr1) {
1915 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1916 return false;
1917
1918 // This assumes SSA form.
1919 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1920 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1921 // Check if the loaded value, e.g. a constantpool of a global address, are
1922 // the same.
1923 if (!produceSameValue(*Def0, *Def1, MRI))
1924 return false;
1925 }
1926
1927 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1928 // %12 = PICLDR %11, 0, 14, %noreg
1929 const MachineOperand &MO0 = MI0.getOperand(i);
1930 const MachineOperand &MO1 = MI1.getOperand(i);
1931 if (!MO0.isIdenticalTo(MO1))
1932 return false;
1933 }
1934 return true;
1935 }
1936
1938}
1939
1940/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1941/// determine if two loads are loading from the same base address. It should
1942/// only return true if the base pointers are the same and the only differences
1943/// between the two addresses is the offset. It also returns the offsets by
1944/// reference.
1945///
1946/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1947/// is permanently disabled.
1949 int64_t &Offset1,
1950 int64_t &Offset2) const {
1951 // Don't worry about Thumb: just ARM and Thumb2.
1952 if (Subtarget.isThumb1Only()) return false;
1953
1954 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1955 return false;
1956
1957 auto IsLoadOpcode = [&](unsigned Opcode) {
1958 switch (Opcode) {
1959 default:
1960 return false;
1961 case ARM::LDRi12:
1962 case ARM::LDRBi12:
1963 case ARM::LDRD:
1964 case ARM::LDRH:
1965 case ARM::LDRSB:
1966 case ARM::LDRSH:
1967 case ARM::VLDRD:
1968 case ARM::VLDRS:
1969 case ARM::t2LDRi8:
1970 case ARM::t2LDRBi8:
1971 case ARM::t2LDRDi8:
1972 case ARM::t2LDRSHi8:
1973 case ARM::t2LDRi12:
1974 case ARM::t2LDRBi12:
1975 case ARM::t2LDRSHi12:
1976 return true;
1977 }
1978 };
1979
1980 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1981 !IsLoadOpcode(Load2->getMachineOpcode()))
1982 return false;
1983
1984 // Check if base addresses and chain operands match.
1985 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1986 Load1->getOperand(4) != Load2->getOperand(4))
1987 return false;
1988
1989 // Index should be Reg0.
1990 if (Load1->getOperand(3) != Load2->getOperand(3))
1991 return false;
1992
1993 // Determine the offsets.
1994 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1995 isa<ConstantSDNode>(Load2->getOperand(1))) {
1996 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1997 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1998 return true;
1999 }
2000
2001 return false;
2002}
2003
2004/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2005/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2006/// be scheduled togther. On some targets if two loads are loading from
2007/// addresses in the same cache line, it's better if they are scheduled
2008/// together. This function takes two integers that represent the load offsets
2009/// from the common base address. It returns true if it decides it's desirable
2010/// to schedule the two loads together. "NumLoads" is the number of loads that
2011/// have already been scheduled after Load1.
2012///
2013/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2014/// is permanently disabled.
2016 int64_t Offset1, int64_t Offset2,
2017 unsigned NumLoads) const {
2018 // Don't worry about Thumb: just ARM and Thumb2.
2019 if (Subtarget.isThumb1Only()) return false;
2020
2021 assert(Offset2 > Offset1);
2022
2023 if ((Offset2 - Offset1) / 8 > 64)
2024 return false;
2025
2026 // Check if the machine opcodes are different. If they are different
2027 // then we consider them to not be of the same base address,
2028 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2029 // In this case, they are considered to be the same because they are different
2030 // encoding forms of the same basic instruction.
2031 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2032 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2033 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2034 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2035 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2036 return false; // FIXME: overly conservative?
2037
2038 // Four loads in a row should be sufficient.
2039 if (NumLoads >= 3)
2040 return false;
2041
2042 return true;
2043}
2044
2046 const MachineBasicBlock *MBB,
2047 const MachineFunction &MF) const {
2048 // Debug info is never a scheduling boundary. It's necessary to be explicit
2049 // due to the special treatment of IT instructions below, otherwise a
2050 // dbg_value followed by an IT will result in the IT instruction being
2051 // considered a scheduling hazard, which is wrong. It should be the actual
2052 // instruction preceding the dbg_value instruction(s), just like it is
2053 // when debug info is not present.
2054 if (MI.isDebugInstr())
2055 return false;
2056
2057 // Terminators and labels can't be scheduled around.
2058 if (MI.isTerminator() || MI.isPosition())
2059 return true;
2060
2061 // INLINEASM_BR can jump to another block
2062 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2063 return true;
2064
2065 if (isSEHInstruction(MI))
2066 return true;
2067
2068 // Treat the start of the IT block as a scheduling boundary, but schedule
2069 // t2IT along with all instructions following it.
2070 // FIXME: This is a big hammer. But the alternative is to add all potential
2071 // true and anti dependencies to IT block instructions as implicit operands
2072 // to the t2IT instruction. The added compile time and complexity does not
2073 // seem worth it.
2075 // Make sure to skip any debug instructions
2076 while (++I != MBB->end() && I->isDebugInstr())
2077 ;
2078 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2079 return true;
2080
2081 // Don't attempt to schedule around any instruction that defines
2082 // a stack-oriented pointer, as it's unlikely to be profitable. This
2083 // saves compile time, because it doesn't require every single
2084 // stack slot reference to depend on the instruction that does the
2085 // modification.
2086 // Calls don't actually change the stack pointer, even if they have imp-defs.
2087 // No ARM calling conventions change the stack pointer. (X86 calling
2088 // conventions sometimes do).
2089 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
2090 return true;
2091
2092 return false;
2093}
2094
2097 unsigned NumCycles, unsigned ExtraPredCycles,
2098 BranchProbability Probability) const {
2099 if (!NumCycles)
2100 return false;
2101
2102 // If we are optimizing for size, see if the branch in the predecessor can be
2103 // lowered to cbn?z by the constant island lowering pass, and return false if
2104 // so. This results in a shorter instruction sequence.
2105 if (MBB.getParent()->getFunction().hasOptSize()) {
2106 MachineBasicBlock *Pred = *MBB.pred_begin();
2107 if (!Pred->empty()) {
2108 MachineInstr *LastMI = &*Pred->rbegin();
2109 if (LastMI->getOpcode() == ARM::t2Bcc) {
2111 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2112 if (CmpMI)
2113 return false;
2114 }
2115 }
2116 }
2117 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2118 MBB, 0, 0, Probability);
2119}
2120
2123 unsigned TCycles, unsigned TExtra,
2124 MachineBasicBlock &FBB,
2125 unsigned FCycles, unsigned FExtra,
2126 BranchProbability Probability) const {
2127 if (!TCycles)
2128 return false;
2129
2130 // In thumb code we often end up trading one branch for a IT block, and
2131 // if we are cloning the instruction can increase code size. Prevent
2132 // blocks with multiple predecesors from being ifcvted to prevent this
2133 // cloning.
2134 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2135 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2136 return false;
2137 }
2138
2139 // Attempt to estimate the relative costs of predication versus branching.
2140 // Here we scale up each component of UnpredCost to avoid precision issue when
2141 // scaling TCycles/FCycles by Probability.
2142 const unsigned ScalingUpFactor = 1024;
2143
2144 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2145 unsigned UnpredCost;
2146 if (!Subtarget.hasBranchPredictor()) {
2147 // When we don't have a branch predictor it's always cheaper to not take a
2148 // branch than take it, so we have to take that into account.
2149 unsigned NotTakenBranchCost = 1;
2150 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2151 unsigned TUnpredCycles, FUnpredCycles;
2152 if (!FCycles) {
2153 // Triangle: TBB is the fallthrough
2154 TUnpredCycles = TCycles + NotTakenBranchCost;
2155 FUnpredCycles = TakenBranchCost;
2156 } else {
2157 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2158 TUnpredCycles = TCycles + TakenBranchCost;
2159 FUnpredCycles = FCycles + NotTakenBranchCost;
2160 // The branch at the end of FBB will disappear when it's predicated, so
2161 // discount it from PredCost.
2162 PredCost -= 1 * ScalingUpFactor;
2163 }
2164 // The total cost is the cost of each path scaled by their probabilites
2165 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2166 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2167 UnpredCost = TUnpredCost + FUnpredCost;
2168 // When predicating assume that the first IT can be folded away but later
2169 // ones cost one cycle each
2170 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2171 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2172 }
2173 } else {
2174 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2175 unsigned FUnpredCost =
2176 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2177 UnpredCost = TUnpredCost + FUnpredCost;
2178 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2179 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2180 }
2181
2182 return PredCost <= UnpredCost;
2183}
2184
2185unsigned
2187 unsigned NumInsts) const {
2188 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2189 // ARM has a condition code field in every predicable instruction, using it
2190 // doesn't change code size.
2191 if (!Subtarget.isThumb2())
2192 return 0;
2193
2194 // It's possible that the size of the IT is restricted to a single block.
2195 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2196 return divideCeil(NumInsts, MaxInsts) * 2;
2197}
2198
2199unsigned
2201 // If this branch is likely to be folded into the comparison to form a
2202 // CB(N)Z, then removing it won't reduce code size at all, because that will
2203 // just replace the CB(N)Z with a CMP.
2204 if (MI.getOpcode() == ARM::t2Bcc &&
2206 return 0;
2207
2208 unsigned Size = getInstSizeInBytes(MI);
2209
2210 // For Thumb2, all branches are 32-bit instructions during the if conversion
2211 // pass, but may be replaced with 16-bit instructions during size reduction.
2212 // Since the branches considered by if conversion tend to be forward branches
2213 // over small basic blocks, they are very likely to be in range for the
2214 // narrow instructions, so we assume the final code size will be half what it
2215 // currently is.
2216 if (Subtarget.isThumb2())
2217 Size /= 2;
2218
2219 return Size;
2220}
2221
2222bool
2224 MachineBasicBlock &FMBB) const {
2225 // Reduce false anti-dependencies to let the target's out-of-order execution
2226 // engine do its thing.
2227 return Subtarget.isProfitableToUnpredicate();
2228}
2229
2230/// getInstrPredicate - If instruction is predicated, returns its predicate
2231/// condition, otherwise returns AL. It also returns the condition code
2232/// register by reference.
2234 Register &PredReg) {
2235 int PIdx = MI.findFirstPredOperandIdx();
2236 if (PIdx == -1) {
2237 PredReg = 0;
2238 return ARMCC::AL;
2239 }
2240
2241 PredReg = MI.getOperand(PIdx+1).getReg();
2242 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2243}
2244
2246 if (Opc == ARM::B)
2247 return ARM::Bcc;
2248 if (Opc == ARM::tB)
2249 return ARM::tBcc;
2250 if (Opc == ARM::t2B)
2251 return ARM::t2Bcc;
2252
2253 llvm_unreachable("Unknown unconditional branch opcode!");
2254}
2255
2257 bool NewMI,
2258 unsigned OpIdx1,
2259 unsigned OpIdx2) const {
2260 switch (MI.getOpcode()) {
2261 case ARM::MOVCCr:
2262 case ARM::t2MOVCCr: {
2263 // MOVCC can be commuted by inverting the condition.
2264 Register PredReg;
2266 // MOVCC AL can't be inverted. Shouldn't happen.
2267 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2268 return nullptr;
2269 MachineInstr *CommutedMI =
2270 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2271 if (!CommutedMI)
2272 return nullptr;
2273 // After swapping the MOVCC operands, also invert the condition.
2274 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2276 return CommutedMI;
2277 }
2278 }
2279 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2280}
2281
2282/// Identify instructions that can be folded into a MOVCC instruction, and
2283/// return the defining instruction.
2285ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2286 const TargetInstrInfo *TII) const {
2287 if (!Reg.isVirtual())
2288 return nullptr;
2289 if (!MRI.hasOneNonDBGUse(Reg))
2290 return nullptr;
2291 MachineInstr *MI = MRI.getVRegDef(Reg);
2292 if (!MI)
2293 return nullptr;
2294 // Check if MI can be predicated and folded into the MOVCC.
2295 if (!isPredicable(*MI))
2296 return nullptr;
2297 // Check if MI has any non-dead defs or physreg uses. This also detects
2298 // predicated instructions which will be reading CPSR.
2299 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2300 // Reject frame index operands, PEI can't handle the predicated pseudos.
2301 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2302 return nullptr;
2303 if (!MO.isReg())
2304 continue;
2305 // MI can't have any tied operands, that would conflict with predication.
2306 if (MO.isTied())
2307 return nullptr;
2308 if (MO.getReg().isPhysical())
2309 return nullptr;
2310 if (MO.isDef() && !MO.isDead())
2311 return nullptr;
2312 }
2313 bool DontMoveAcrossStores = true;
2314 if (!MI->isSafeToMove(DontMoveAcrossStores))
2315 return nullptr;
2316 return MI;
2317}
2318
2321 unsigned &TrueOp, unsigned &FalseOp,
2322 bool &Optimizable) const {
2323 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2324 "Unknown select instruction");
2325 // MOVCC operands:
2326 // 0: Def.
2327 // 1: True use.
2328 // 2: False use.
2329 // 3: Condition code.
2330 // 4: CPSR use.
2331 TrueOp = 1;
2332 FalseOp = 2;
2333 Cond.push_back(MI.getOperand(3));
2334 Cond.push_back(MI.getOperand(4));
2335 // We can always fold a def.
2336 Optimizable = true;
2337 return false;
2338}
2339
2343 bool PreferFalse) const {
2344 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2345 "Unknown select instruction");
2346 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2347 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2348 bool Invert = !DefMI;
2349 if (!DefMI)
2350 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2351 if (!DefMI)
2352 return nullptr;
2353
2354 // Find new register class to use.
2355 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2356 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2357 Register DestReg = MI.getOperand(0).getReg();
2358 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2359 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2360 if (!MRI.constrainRegClass(DestReg, FalseClass))
2361 return nullptr;
2362 if (!MRI.constrainRegClass(DestReg, TrueClass))
2363 return nullptr;
2364
2365 // Create a new predicated version of DefMI.
2366 // Rfalse is the first use.
2367 MachineInstrBuilder NewMI =
2368 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2369
2370 // Copy all the DefMI operands, excluding its (null) predicate.
2371 const MCInstrDesc &DefDesc = DefMI->getDesc();
2372 for (unsigned i = 1, e = DefDesc.getNumOperands();
2373 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2374 NewMI.add(DefMI->getOperand(i));
2375
2376 unsigned CondCode = MI.getOperand(3).getImm();
2377 if (Invert)
2379 else
2380 NewMI.addImm(CondCode);
2381 NewMI.add(MI.getOperand(4));
2382
2383 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2384 if (NewMI->hasOptionalDef())
2385 NewMI.add(condCodeOp());
2386
2387 // The output register value when the predicate is false is an implicit
2388 // register operand tied to the first def.
2389 // The tie makes the register allocator ensure the FalseReg is allocated the
2390 // same register as operand 0.
2391 FalseReg.setImplicit();
2392 NewMI.add(FalseReg);
2393 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2394
2395 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2396 SeenMIs.insert(NewMI);
2397 SeenMIs.erase(DefMI);
2398
2399 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2400 // DefMI would be invalid when tranferred inside the loop. Checking for a
2401 // loop is expensive, but at least remove kill flags if they are in different
2402 // BBs.
2403 if (DefMI->getParent() != MI.getParent())
2404 NewMI->clearKillInfo();
2405
2406 // The caller will erase MI, but not DefMI.
2408 return NewMI;
2409}
2410
2411/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2412/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2413/// def operand.
2414///
2415/// This will go away once we can teach tblgen how to set the optional CPSR def
2416/// operand itself.
2420};
2421
2423 {ARM::ADDSri, ARM::ADDri},
2424 {ARM::ADDSrr, ARM::ADDrr},
2425 {ARM::ADDSrsi, ARM::ADDrsi},
2426 {ARM::ADDSrsr, ARM::ADDrsr},
2427
2428 {ARM::SUBSri, ARM::SUBri},
2429 {ARM::SUBSrr, ARM::SUBrr},
2430 {ARM::SUBSrsi, ARM::SUBrsi},
2431 {ARM::SUBSrsr, ARM::SUBrsr},
2432
2433 {ARM::RSBSri, ARM::RSBri},
2434 {ARM::RSBSrsi, ARM::RSBrsi},
2435 {ARM::RSBSrsr, ARM::RSBrsr},
2436
2437 {ARM::tADDSi3, ARM::tADDi3},
2438 {ARM::tADDSi8, ARM::tADDi8},
2439 {ARM::tADDSrr, ARM::tADDrr},
2440 {ARM::tADCS, ARM::tADC},
2441
2442 {ARM::tSUBSi3, ARM::tSUBi3},
2443 {ARM::tSUBSi8, ARM::tSUBi8},
2444 {ARM::tSUBSrr, ARM::tSUBrr},
2445 {ARM::tSBCS, ARM::tSBC},
2446 {ARM::tRSBS, ARM::tRSB},
2447 {ARM::tLSLSri, ARM::tLSLri},
2448
2449 {ARM::t2ADDSri, ARM::t2ADDri},
2450 {ARM::t2ADDSrr, ARM::t2ADDrr},
2451 {ARM::t2ADDSrs, ARM::t2ADDrs},
2452
2453 {ARM::t2SUBSri, ARM::t2SUBri},
2454 {ARM::t2SUBSrr, ARM::t2SUBrr},
2455 {ARM::t2SUBSrs, ARM::t2SUBrs},
2456
2457 {ARM::t2RSBSri, ARM::t2RSBri},
2458 {ARM::t2RSBSrs, ARM::t2RSBrs},
2459};
2460
2461unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2462 for (const auto &Entry : AddSubFlagsOpcodeMap)
2463 if (OldOpc == Entry.PseudoOpc)
2464 return Entry.MachineOpc;
2465 return 0;
2466}
2467
2470 const DebugLoc &dl, Register DestReg,
2471 Register BaseReg, int NumBytes,
2472 ARMCC::CondCodes Pred, Register PredReg,
2473 const ARMBaseInstrInfo &TII,
2474 unsigned MIFlags) {
2475 if (NumBytes == 0 && DestReg != BaseReg) {
2476 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2477 .addReg(BaseReg, RegState::Kill)
2478 .add(predOps(Pred, PredReg))
2479 .add(condCodeOp())
2480 .setMIFlags(MIFlags);
2481 return;
2482 }
2483
2484 bool isSub = NumBytes < 0;
2485 if (isSub) NumBytes = -NumBytes;
2486
2487 while (NumBytes) {
2488 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2489 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2490 assert(ThisVal && "Didn't extract field correctly");
2491
2492 // We will handle these bits from offset, clear them.
2493 NumBytes &= ~ThisVal;
2494
2495 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2496
2497 // Build the new ADD / SUB.
2498 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2499 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2500 .addReg(BaseReg, RegState::Kill)
2501 .addImm(ThisVal)
2502 .add(predOps(Pred, PredReg))
2503 .add(condCodeOp())
2504 .setMIFlags(MIFlags);
2505 BaseReg = DestReg;
2506 }
2507}
2508
2511 unsigned NumBytes) {
2512 // This optimisation potentially adds lots of load and store
2513 // micro-operations, it's only really a great benefit to code-size.
2514 if (!Subtarget.hasMinSize())
2515 return false;
2516
2517 // If only one register is pushed/popped, LLVM can use an LDR/STR
2518 // instead. We can't modify those so make sure we're dealing with an
2519 // instruction we understand.
2520 bool IsPop = isPopOpcode(MI->getOpcode());
2521 bool IsPush = isPushOpcode(MI->getOpcode());
2522 if (!IsPush && !IsPop)
2523 return false;
2524
2525 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2526 MI->getOpcode() == ARM::VLDMDIA_UPD;
2527 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2528 MI->getOpcode() == ARM::tPOP ||
2529 MI->getOpcode() == ARM::tPOP_RET;
2530
2531 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2532 MI->getOperand(1).getReg() == ARM::SP)) &&
2533 "trying to fold sp update into non-sp-updating push/pop");
2534
2535 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2536 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2537 // if this is violated.
2538 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2539 return false;
2540
2541 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2542 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2543 int RegListIdx = IsT1PushPop ? 2 : 4;
2544
2545 // Calculate the space we'll need in terms of registers.
2546 unsigned RegsNeeded;
2547 const TargetRegisterClass *RegClass;
2548 if (IsVFPPushPop) {
2549 RegsNeeded = NumBytes / 8;
2550 RegClass = &ARM::DPRRegClass;
2551 } else {
2552 RegsNeeded = NumBytes / 4;
2553 RegClass = &ARM::GPRRegClass;
2554 }
2555
2556 // We're going to have to strip all list operands off before
2557 // re-adding them since the order matters, so save the existing ones
2558 // for later.
2560
2561 // We're also going to need the first register transferred by this
2562 // instruction, which won't necessarily be the first register in the list.
2563 unsigned FirstRegEnc = -1;
2564
2566 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2567 MachineOperand &MO = MI->getOperand(i);
2568 RegList.push_back(MO);
2569
2570 if (MO.isReg() && !MO.isImplicit() &&
2571 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2572 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2573 }
2574
2575 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2576
2577 // Now try to find enough space in the reglist to allocate NumBytes.
2578 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2579 --CurRegEnc) {
2580 unsigned CurReg = RegClass->getRegister(CurRegEnc);
2581 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2582 continue;
2583 if (!IsPop) {
2584 // Pushing any register is completely harmless, mark the register involved
2585 // as undef since we don't care about its value and must not restore it
2586 // during stack unwinding.
2587 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2588 false, false, true));
2589 --RegsNeeded;
2590 continue;
2591 }
2592
2593 // However, we can only pop an extra register if it's not live. For
2594 // registers live within the function we might clobber a return value
2595 // register; the other way a register can be live here is if it's
2596 // callee-saved.
2597 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2598 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2600 // VFP pops don't allow holes in the register list, so any skip is fatal
2601 // for our transformation. GPR pops do, so we should just keep looking.
2602 if (IsVFPPushPop)
2603 return false;
2604 else
2605 continue;
2606 }
2607
2608 // Mark the unimportant registers as <def,dead> in the POP.
2609 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2610 true));
2611 --RegsNeeded;
2612 }
2613
2614 if (RegsNeeded > 0)
2615 return false;
2616
2617 // Finally we know we can profitably perform the optimisation so go
2618 // ahead: strip all existing registers off and add them back again
2619 // in the right order.
2620 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2621 MI->removeOperand(i);
2622
2623 // Add the complete list back in.
2624 MachineInstrBuilder MIB(MF, &*MI);
2625 for (const MachineOperand &MO : llvm::reverse(RegList))
2626 MIB.add(MO);
2627
2628 return true;
2629}
2630
2631bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2632 Register FrameReg, int &Offset,
2633 const ARMBaseInstrInfo &TII) {
2634 unsigned Opcode = MI.getOpcode();
2635 const MCInstrDesc &Desc = MI.getDesc();
2636 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2637 bool isSub = false;
2638
2639 // Memory operands in inline assembly always use AddrMode2.
2640 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2642
2643 if (Opcode == ARM::ADDri) {
2644 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2645 if (Offset == 0) {
2646 // Turn it into a move.
2647 MI.setDesc(TII.get(ARM::MOVr));
2648 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2649 MI.removeOperand(FrameRegIdx+1);
2650 Offset = 0;
2651 return true;
2652 } else if (Offset < 0) {
2653 Offset = -Offset;
2654 isSub = true;
2655 MI.setDesc(TII.get(ARM::SUBri));
2656 }
2657
2658 // Common case: small offset, fits into instruction.
2659 if (ARM_AM::getSOImmVal(Offset) != -1) {
2660 // Replace the FrameIndex with sp / fp
2661 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2662 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2663 Offset = 0;
2664 return true;
2665 }
2666
2667 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2668 // as possible.
2669 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2670 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2671
2672 // We will handle these bits from offset, clear them.
2673 Offset &= ~ThisImmVal;
2674
2675 // Get the properly encoded SOImmVal field.
2676 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2677 "Bit extraction didn't work?");
2678 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2679 } else {
2680 unsigned ImmIdx = 0;
2681 int InstrOffs = 0;
2682 unsigned NumBits = 0;
2683 unsigned Scale = 1;
2684 switch (AddrMode) {
2686 ImmIdx = FrameRegIdx + 1;
2687 InstrOffs = MI.getOperand(ImmIdx).getImm();
2688 NumBits = 12;
2689 break;
2690 case ARMII::AddrMode2:
2691 ImmIdx = FrameRegIdx+2;
2692 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2693 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2694 InstrOffs *= -1;
2695 NumBits = 12;
2696 break;
2697 case ARMII::AddrMode3:
2698 ImmIdx = FrameRegIdx+2;
2699 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2700 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2701 InstrOffs *= -1;
2702 NumBits = 8;
2703 break;
2704 case ARMII::AddrMode4:
2705 case ARMII::AddrMode6:
2706 // Can't fold any offset even if it's zero.
2707 return false;
2708 case ARMII::AddrMode5:
2709 ImmIdx = FrameRegIdx+1;
2710 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2711 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2712 InstrOffs *= -1;
2713 NumBits = 8;
2714 Scale = 4;
2715 break;
2717 ImmIdx = FrameRegIdx+1;
2718 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2719 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2720 InstrOffs *= -1;
2721 NumBits = 8;
2722 Scale = 2;
2723 break;
2727 ImmIdx = FrameRegIdx+1;
2728 InstrOffs = MI.getOperand(ImmIdx).getImm();
2729 NumBits = 7;
2730 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2731 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2732 break;
2733 default:
2734 llvm_unreachable("Unsupported addressing mode!");
2735 }
2736
2737 Offset += InstrOffs * Scale;
2738 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2739 if (Offset < 0) {
2740 Offset = -Offset;
2741 isSub = true;
2742 }
2743
2744 // Attempt to fold address comp. if opcode has offset bits
2745 if (NumBits > 0) {
2746 // Common case: small offset, fits into instruction.
2747 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2748 int ImmedOffset = Offset / Scale;
2749 unsigned Mask = (1 << NumBits) - 1;
2750 if ((unsigned)Offset <= Mask * Scale) {
2751 // Replace the FrameIndex with sp
2752 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2753 // FIXME: When addrmode2 goes away, this will simplify (like the
2754 // T2 version), as the LDR.i12 versions don't need the encoding
2755 // tricks for the offset value.
2756 if (isSub) {
2758 ImmedOffset = -ImmedOffset;
2759 else
2760 ImmedOffset |= 1 << NumBits;
2761 }
2762 ImmOp.ChangeToImmediate(ImmedOffset);
2763 Offset = 0;
2764 return true;
2765 }
2766
2767 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2768 ImmedOffset = ImmedOffset & Mask;
2769 if (isSub) {
2771 ImmedOffset = -ImmedOffset;
2772 else
2773 ImmedOffset |= 1 << NumBits;
2774 }
2775 ImmOp.ChangeToImmediate(ImmedOffset);
2776 Offset &= ~(Mask*Scale);
2777 }
2778 }
2779
2780 Offset = (isSub) ? -Offset : Offset;
2781 return Offset == 0;
2782}
2783
2784/// analyzeCompare - For a comparison instruction, return the source registers
2785/// in SrcReg and SrcReg2 if having two register operands, and the value it
2786/// compares against in CmpValue. Return true if the comparison instruction
2787/// can be analyzed.
2789 Register &SrcReg2, int64_t &CmpMask,
2790 int64_t &CmpValue) const {
2791 switch (MI.getOpcode()) {
2792 default: break;
2793 case ARM::CMPri:
2794 case ARM::t2CMPri:
2795 case ARM::tCMPi8:
2796 SrcReg = MI.getOperand(0).getReg();
2797 SrcReg2 = 0;
2798 CmpMask = ~0;
2799 CmpValue = MI.getOperand(1).getImm();
2800 return true;
2801 case ARM::CMPrr:
2802 case ARM::t2CMPrr:
2803 case ARM::tCMPr:
2804 SrcReg = MI.getOperand(0).getReg();
2805 SrcReg2 = MI.getOperand(1).getReg();
2806 CmpMask = ~0;
2807 CmpValue = 0;
2808 return true;
2809 case ARM::TSTri:
2810 case ARM::t2TSTri:
2811 SrcReg = MI.getOperand(0).getReg();
2812 SrcReg2 = 0;
2813 CmpMask = MI.getOperand(1).getImm();
2814 CmpValue = 0;
2815 return true;
2816 }
2817
2818 return false;
2819}
2820
2821/// isSuitableForMask - Identify a suitable 'and' instruction that
2822/// operates on the given source register and applies the same mask
2823/// as a 'tst' instruction. Provide a limited look-through for copies.
2824/// When successful, MI will hold the found instruction.
2826 int CmpMask, bool CommonUse) {
2827 switch (MI->getOpcode()) {
2828 case ARM::ANDri:
2829 case ARM::t2ANDri:
2830 if (CmpMask != MI->getOperand(2).getImm())
2831 return false;
2832 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2833 return true;
2834 break;
2835 }
2836
2837 return false;
2838}
2839
2840/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2841/// the condition code if we modify the instructions such that flags are
2842/// set by ADD(a,b,X).
2844 switch (CC) {
2845 default: return ARMCC::AL;
2846 case ARMCC::HS: return ARMCC::LO;
2847 case ARMCC::LO: return ARMCC::HS;
2848 case ARMCC::VS: return ARMCC::VS;
2849 case ARMCC::VC: return ARMCC::VC;
2850 }
2851}
2852
2853/// isRedundantFlagInstr - check whether the first instruction, whose only
2854/// purpose is to update flags, can be made redundant.
2855/// CMPrr can be made redundant by SUBrr if the operands are the same.
2856/// CMPri can be made redundant by SUBri if the operands are the same.
2857/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2858/// This function can be extended later on.
2859inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2860 Register SrcReg, Register SrcReg2,
2861 int64_t ImmValue,
2862 const MachineInstr *OI,
2863 bool &IsThumb1) {
2864 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2865 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2866 ((OI->getOperand(1).getReg() == SrcReg &&
2867 OI->getOperand(2).getReg() == SrcReg2) ||
2868 (OI->getOperand(1).getReg() == SrcReg2 &&
2869 OI->getOperand(2).getReg() == SrcReg))) {
2870 IsThumb1 = false;
2871 return true;
2872 }
2873
2874 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2875 ((OI->getOperand(2).getReg() == SrcReg &&
2876 OI->getOperand(3).getReg() == SrcReg2) ||
2877 (OI->getOperand(2).getReg() == SrcReg2 &&
2878 OI->getOperand(3).getReg() == SrcReg))) {
2879 IsThumb1 = true;
2880 return true;
2881 }
2882
2883 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2884 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2885 OI->getOperand(1).getReg() == SrcReg &&
2886 OI->getOperand(2).getImm() == ImmValue) {
2887 IsThumb1 = false;
2888 return true;
2889 }
2890
2891 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2892 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2893 OI->getOperand(2).getReg() == SrcReg &&
2894 OI->getOperand(3).getImm() == ImmValue) {
2895 IsThumb1 = true;
2896 return true;
2897 }
2898
2899 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2900 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2901 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2902 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2903 OI->getOperand(0).getReg() == SrcReg &&
2904 OI->getOperand(1).getReg() == SrcReg2) {
2905 IsThumb1 = false;
2906 return true;
2907 }
2908
2909 if (CmpI->getOpcode() == ARM::tCMPr &&
2910 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2911 OI->getOpcode() == ARM::tADDrr) &&
2912 OI->getOperand(0).getReg() == SrcReg &&
2913 OI->getOperand(2).getReg() == SrcReg2) {
2914 IsThumb1 = true;
2915 return true;
2916 }
2917
2918 return false;
2919}
2920
2921static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2922 switch (MI->getOpcode()) {
2923 default: return false;
2924 case ARM::tLSLri:
2925 case ARM::tLSRri:
2926 case ARM::tLSLrr:
2927 case ARM::tLSRrr:
2928 case ARM::tSUBrr:
2929 case ARM::tADDrr:
2930 case ARM::tADDi3:
2931 case ARM::tADDi8:
2932 case ARM::tSUBi3:
2933 case ARM::tSUBi8:
2934 case ARM::tMUL:
2935 case ARM::tADC:
2936 case ARM::tSBC:
2937 case ARM::tRSB:
2938 case ARM::tAND:
2939 case ARM::tORR:
2940 case ARM::tEOR:
2941 case ARM::tBIC:
2942 case ARM::tMVN:
2943 case ARM::tASRri:
2944 case ARM::tASRrr:
2945 case ARM::tROR:
2946 IsThumb1 = true;
2947 [[fallthrough]];
2948 case ARM::RSBrr:
2949 case ARM::RSBri:
2950 case ARM::RSCrr:
2951 case ARM::RSCri:
2952 case ARM::ADDrr:
2953 case ARM::ADDri:
2954 case ARM::ADCrr:
2955 case ARM::ADCri:
2956 case ARM::SUBrr:
2957 case ARM::SUBri:
2958 case ARM::SBCrr:
2959 case ARM::SBCri:
2960 case ARM::t2RSBri:
2961 case ARM::t2ADDrr:
2962 case ARM::t2ADDri:
2963 case ARM::t2ADCrr:
2964 case ARM::t2ADCri:
2965 case ARM::t2SUBrr:
2966 case ARM::t2SUBri:
2967 case ARM::t2SBCrr:
2968 case ARM::t2SBCri:
2969 case ARM::ANDrr:
2970 case ARM::ANDri:
2971 case ARM::ANDrsr:
2972 case ARM::ANDrsi:
2973 case ARM::t2ANDrr:
2974 case ARM::t2ANDri:
2975 case ARM::t2ANDrs:
2976 case ARM::ORRrr:
2977 case ARM::ORRri:
2978 case ARM::ORRrsr:
2979 case ARM::ORRrsi:
2980 case ARM::t2ORRrr:
2981 case ARM::t2ORRri:
2982 case ARM::t2ORRrs:
2983 case ARM::EORrr:
2984 case ARM::EORri:
2985 case ARM::EORrsr:
2986 case ARM::EORrsi:
2987 case ARM::t2EORrr:
2988 case ARM::t2EORri:
2989 case ARM::t2EORrs:
2990 case ARM::BICri:
2991 case ARM::BICrr:
2992 case ARM::BICrsi:
2993 case ARM::BICrsr:
2994 case ARM::t2BICri:
2995 case ARM::t2BICrr:
2996 case ARM::t2BICrs:
2997 case ARM::t2LSRri:
2998 case ARM::t2LSRrr:
2999 case ARM::t2LSLri:
3000 case ARM::t2LSLrr:
3001 case ARM::MOVsr:
3002 case ARM::MOVsi:
3003 return true;
3004 }
3005}
3006
3007/// optimizeCompareInstr - Convert the instruction supplying the argument to the
3008/// comparison into one that sets the zero bit in the flags register;
3009/// Remove a redundant Compare instruction if an earlier instruction can set the
3010/// flags in the same way as Compare.
3011/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3012/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3013/// condition code of instructions which use the flags.
3015 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3016 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3017 // Get the unique definition of SrcReg.
3018 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3019 if (!MI) return false;
3020
3021 // Masked compares sometimes use the same register as the corresponding 'and'.
3022 if (CmpMask != ~0) {
3023 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3024 MI = nullptr;
3026 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3027 UI != UE; ++UI) {
3028 if (UI->getParent() != CmpInstr.getParent())
3029 continue;
3030 MachineInstr *PotentialAND = &*UI;
3031 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3032 isPredicated(*PotentialAND))
3033 continue;
3034 MI = PotentialAND;
3035 break;
3036 }
3037 if (!MI) return false;
3038 }
3039 }
3040
3041 // Get ready to iterate backward from CmpInstr.
3042 MachineBasicBlock::iterator I = CmpInstr, E = MI,
3043 B = CmpInstr.getParent()->begin();
3044
3045 // Early exit if CmpInstr is at the beginning of the BB.
3046 if (I == B) return false;
3047
3048 // There are two possible candidates which can be changed to set CPSR:
3049 // One is MI, the other is a SUB or ADD instruction.
3050 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3051 // ADDr[ri](r1, r2, X).
3052 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3053 MachineInstr *SubAdd = nullptr;
3054 if (SrcReg2 != 0)
3055 // MI is not a candidate for CMPrr.
3056 MI = nullptr;
3057 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3058 // Conservatively refuse to convert an instruction which isn't in the same
3059 // BB as the comparison.
3060 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3061 // Thus we cannot return here.
3062 if (CmpInstr.getOpcode() == ARM::CMPri ||
3063 CmpInstr.getOpcode() == ARM::t2CMPri ||
3064 CmpInstr.getOpcode() == ARM::tCMPi8)
3065 MI = nullptr;
3066 else
3067 return false;
3068 }
3069
3070 bool IsThumb1 = false;
3071 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3072 return false;
3073
3074 // We also want to do this peephole for cases like this: if (a*b == 0),
3075 // and optimise away the CMP instruction from the generated code sequence:
3076 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3077 // resulting from the select instruction, but these MOVS instructions for
3078 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3079 // However, if we only have MOVS instructions in between the CMP and the
3080 // other instruction (the MULS in this example), then the CPSR is dead so we
3081 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3082 // reordering and then continue the analysis hoping we can eliminate the
3083 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3084 // consequence, the movs won't redefine/kill the MUL operands which would
3085 // make this reordering illegal.
3087 if (MI && IsThumb1) {
3088 --I;
3089 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3090 bool CanReorder = true;
3091 for (; I != E; --I) {
3092 if (I->getOpcode() != ARM::tMOVi8) {
3093 CanReorder = false;
3094 break;
3095 }
3096 }
3097 if (CanReorder) {
3098 MI = MI->removeFromParent();
3099 E = CmpInstr;
3100 CmpInstr.getParent()->insert(E, MI);
3101 }
3102 }
3103 I = CmpInstr;
3104 E = MI;
3105 }
3106
3107 // Check that CPSR isn't set between the comparison instruction and the one we
3108 // want to change. At the same time, search for SubAdd.
3109 bool SubAddIsThumb1 = false;
3110 do {
3111 const MachineInstr &Instr = *--I;
3112
3113 // Check whether CmpInstr can be made redundant by the current instruction.
3114 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3115 SubAddIsThumb1)) {
3116 SubAdd = &*I;
3117 break;
3118 }
3119
3120 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3121 if (I == E)
3122 break;
3123
3124 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3125 Instr.readsRegister(ARM::CPSR, TRI))
3126 // This instruction modifies or uses CPSR after the one we want to
3127 // change. We can't do this transformation.
3128 return false;
3129
3130 if (I == B) {
3131 // In some cases, we scan the use-list of an instruction for an AND;
3132 // that AND is in the same BB, but may not be scheduled before the
3133 // corresponding TST. In that case, bail out.
3134 //
3135 // FIXME: We could try to reschedule the AND.
3136 return false;
3137 }
3138 } while (true);
3139
3140 // Return false if no candidates exist.
3141 if (!MI && !SubAdd)
3142 return false;
3143
3144 // If we found a SubAdd, use it as it will be closer to the CMP
3145 if (SubAdd) {
3146 MI = SubAdd;
3147 IsThumb1 = SubAddIsThumb1;
3148 }
3149
3150 // We can't use a predicated instruction - it doesn't always write the flags.
3151 if (isPredicated(*MI))
3152 return false;
3153
3154 // Scan forward for the use of CPSR
3155 // When checking against MI: if it's a conditional code that requires
3156 // checking of the V bit or C bit, then this is not safe to do.
3157 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3158 // If we are done with the basic block, we need to check whether CPSR is
3159 // live-out.
3161 OperandsToUpdate;
3162 bool isSafe = false;
3163 I = CmpInstr;
3164 E = CmpInstr.getParent()->end();
3165 while (!isSafe && ++I != E) {
3166 const MachineInstr &Instr = *I;
3167 for (unsigned IO = 0, EO = Instr.getNumOperands();
3168 !isSafe && IO != EO; ++IO) {
3169 const MachineOperand &MO = Instr.getOperand(IO);
3170 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3171 isSafe = true;
3172 break;
3173 }
3174 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3175 continue;
3176 if (MO.isDef()) {
3177 isSafe = true;
3178 break;
3179 }
3180 // Condition code is after the operand before CPSR except for VSELs.
3182 bool IsInstrVSel = true;
3183 switch (Instr.getOpcode()) {
3184 default:
3185 IsInstrVSel = false;
3186 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3187 break;
3188 case ARM::VSELEQD:
3189 case ARM::VSELEQS:
3190 case ARM::VSELEQH:
3191 CC = ARMCC::EQ;
3192 break;
3193 case ARM::VSELGTD:
3194 case ARM::VSELGTS:
3195 case ARM::VSELGTH:
3196 CC = ARMCC::GT;
3197 break;
3198 case ARM::VSELGED:
3199 case ARM::VSELGES:
3200 case ARM::VSELGEH:
3201 CC = ARMCC::GE;
3202 break;
3203 case ARM::VSELVSD:
3204 case ARM::VSELVSS:
3205 case ARM::VSELVSH:
3206 CC = ARMCC::VS;
3207 break;
3208 }
3209
3210 if (SubAdd) {
3211 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3212 // on CMP needs to be updated to be based on SUB.
3213 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3214 // needs to be modified.
3215 // Push the condition code operands to OperandsToUpdate.
3216 // If it is safe to remove CmpInstr, the condition code of these
3217 // operands will be modified.
3218 unsigned Opc = SubAdd->getOpcode();
3219 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3220 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3221 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3222 Opc == ARM::tSUBi8;
3223 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3224 if (!IsSub ||
3225 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3226 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3227 // VSel doesn't support condition code update.
3228 if (IsInstrVSel)
3229 return false;
3230 // Ensure we can swap the condition.
3232 if (NewCC == ARMCC::AL)
3233 return false;
3234 OperandsToUpdate.push_back(
3235 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3236 }
3237 } else {
3238 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3239 switch (CC) {
3240 case ARMCC::EQ: // Z
3241 case ARMCC::NE: // Z
3242 case ARMCC::MI: // N
3243 case ARMCC::PL: // N
3244 case ARMCC::AL: // none
3245 // CPSR can be used multiple times, we should continue.
3246 break;
3247 case ARMCC::HS: // C
3248 case ARMCC::LO: // C
3249 case ARMCC::VS: // V
3250 case ARMCC::VC: // V
3251 case ARMCC::HI: // C Z
3252 case ARMCC::LS: // C Z
3253 case ARMCC::GE: // N V
3254 case ARMCC::LT: // N V
3255 case ARMCC::GT: // Z N V
3256 case ARMCC::LE: // Z N V
3257 // The instruction uses the V bit or C bit which is not safe.
3258 return false;
3259 }
3260 }
3261 }
3262 }
3263
3264 // If CPSR is not killed nor re-defined, we should check whether it is
3265 // live-out. If it is live-out, do not optimize.
3266 if (!isSafe) {
3267 MachineBasicBlock *MBB = CmpInstr.getParent();
3268 for (MachineBasicBlock *Succ : MBB->successors())
3269 if (Succ->isLiveIn(ARM::CPSR))
3270 return false;
3271 }
3272
3273 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3274 // set CPSR so this is represented as an explicit output)
3275 if (!IsThumb1) {
3276 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3277 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3278 MI->getOperand(CPSRRegNum).setIsDef(true);
3279 }
3280 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3281 CmpInstr.eraseFromParent();
3282
3283 // Modify the condition code of operands in OperandsToUpdate.
3284 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3285 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3286 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3287 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3288
3289 MI->clearRegisterDeads(ARM::CPSR);
3290
3291 return true;
3292}
3293
3295 // Do not sink MI if it might be used to optimize a redundant compare.
3296 // We heuristically only look at the instruction immediately following MI to
3297 // avoid potentially searching the entire basic block.
3298 if (isPredicated(MI))
3299 return true;
3301 ++Next;
3302 Register SrcReg, SrcReg2;
3303 int64_t CmpMask, CmpValue;
3304 bool IsThumb1;
3305 if (Next != MI.getParent()->end() &&
3306 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3307 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3308 return false;
3309 return true;
3310}
3311
3313 Register Reg,
3314 MachineRegisterInfo *MRI) const {
3315 // Fold large immediates into add, sub, or, xor.
3316 unsigned DefOpc = DefMI.getOpcode();
3317 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3318 DefOpc != ARM::tMOVi32imm)
3319 return false;
3320 if (!DefMI.getOperand(1).isImm())
3321 // Could be t2MOVi32imm @xx
3322 return false;
3323
3324 if (!MRI->hasOneNonDBGUse(Reg))
3325 return false;
3326
3327 const MCInstrDesc &DefMCID = DefMI.getDesc();
3328 if (DefMCID.hasOptionalDef()) {
3329 unsigned NumOps = DefMCID.getNumOperands();
3330 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3331 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3332 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3333 // to delete DefMI.
3334 return false;
3335 }
3336
3337 const MCInstrDesc &UseMCID = UseMI.getDesc();
3338 if (UseMCID.hasOptionalDef()) {
3339 unsigned NumOps = UseMCID.getNumOperands();
3340 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3341 // If the instruction sets the flag, do not attempt this optimization
3342 // since it may change the semantics of the code.
3343 return false;
3344 }
3345
3346 unsigned UseOpc = UseMI.getOpcode();
3347 unsigned NewUseOpc = 0;
3348 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3349 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3350 bool Commute = false;
3351 switch (UseOpc) {
3352 default: return false;
3353 case ARM::SUBrr:
3354 case ARM::ADDrr:
3355 case ARM::ORRrr:
3356 case ARM::EORrr:
3357 case ARM::t2SUBrr:
3358 case ARM::t2ADDrr:
3359 case ARM::t2ORRrr:
3360 case ARM::t2EORrr: {
3361 Commute = UseMI.getOperand(2).getReg() != Reg;
3362 switch (UseOpc) {
3363 default: break;
3364 case ARM::ADDrr:
3365 case ARM::SUBrr:
3366 if (UseOpc == ARM::SUBrr && Commute)
3367 return false;
3368
3369 // ADD/SUB are special because they're essentially the same operation, so
3370 // we can handle a larger range of immediates.
3371 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3372 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3373 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3374 ImmVal = -ImmVal;
3375 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3376 } else
3377 return false;
3378 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3379 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3380 break;
3381 case ARM::ORRrr:
3382 case ARM::EORrr:
3383 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3384 return false;
3385 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3386 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3387 switch (UseOpc) {
3388 default: break;
3389 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3390 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3391 }
3392 break;
3393 case ARM::t2ADDrr:
3394 case ARM::t2SUBrr: {
3395 if (UseOpc == ARM::t2SUBrr && Commute)
3396 return false;
3397
3398 // ADD/SUB are special because they're essentially the same operation, so
3399 // we can handle a larger range of immediates.
3400 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3401 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3402 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3403 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3404 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3405 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3406 ImmVal = -ImmVal;
3407 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3408 } else
3409 return false;
3410 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3411 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3412 break;
3413 }
3414 case ARM::t2ORRrr:
3415 case ARM::t2EORrr:
3416 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3417 return false;
3418 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3419 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3420 switch (UseOpc) {
3421 default: break;
3422 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3423 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3424 }
3425 break;
3426 }
3427 }
3428 }
3429
3430 unsigned OpIdx = Commute ? 2 : 1;
3431 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3432 bool isKill = UseMI.getOperand(OpIdx).isKill();
3433 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3434 Register NewReg = MRI->createVirtualRegister(TRC);
3435 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3436 NewReg)
3437 .addReg(Reg1, getKillRegState(isKill))
3438 .addImm(SOImmValV1)
3440 .add(condCodeOp());
3441 UseMI.setDesc(get(NewUseOpc));
3442 UseMI.getOperand(1).setReg(NewReg);
3443 UseMI.getOperand(1).setIsKill();
3444 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3445 DefMI.eraseFromParent();
3446 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3447 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3448 // Then the below code will not be needed, as the input/output register
3449 // classes will be rgpr or gprSP.
3450 // For now, we fix the UseMI operand explicitly here:
3451 switch(NewUseOpc){
3452 case ARM::t2ADDspImm:
3453 case ARM::t2SUBspImm:
3454 case ARM::t2ADDri:
3455 case ARM::t2SUBri:
3456 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3457 }
3458 return true;
3459}
3460
3461static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3462 const MachineInstr &MI) {
3463 switch (MI.getOpcode()) {
3464 default: {
3465 const MCInstrDesc &Desc = MI.getDesc();
3466 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3467 assert(UOps >= 0 && "bad # UOps");
3468 return UOps;
3469 }
3470
3471 case ARM::LDRrs:
3472 case ARM::LDRBrs:
3473 case ARM::STRrs:
3474 case ARM::STRBrs: {
3475 unsigned ShOpVal = MI.getOperand(3).getImm();
3476 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3477 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3478 if (!isSub &&
3479 (ShImm == 0 ||
3480 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3481 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3482 return 1;
3483 return 2;
3484 }
3485
3486 case ARM::LDRH:
3487 case ARM::STRH: {
3488 if (!MI.getOperand(2).getReg())
3489 return 1;
3490
3491 unsigned ShOpVal = MI.getOperand(3).getImm();
3492 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3493 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3494 if (!isSub &&
3495 (ShImm == 0 ||
3496 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3497 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3498 return 1;
3499 return 2;
3500 }
3501
3502 case ARM::LDRSB:
3503 case ARM::LDRSH:
3504 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3505
3506 case ARM::LDRSB_POST:
3507 case ARM::LDRSH_POST: {
3508 Register Rt = MI.getOperand(0).getReg();
3509 Register Rm = MI.getOperand(3).getReg();
3510 return (Rt == Rm) ? 4 : 3;
3511 }
3512
3513 case ARM::LDR_PRE_REG:
3514 case ARM::LDRB_PRE_REG: {
3515 Register Rt = MI.getOperand(0).getReg();
3516 Register Rm = MI.getOperand(3).getReg();
3517 if (Rt == Rm)
3518 return 3;
3519 unsigned ShOpVal = MI.getOperand(4).getImm();
3520 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3521 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3522 if (!isSub &&
3523 (ShImm == 0 ||
3524 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3525 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3526 return 2;
3527 return 3;
3528 }
3529
3530 case ARM::STR_PRE_REG:
3531 case ARM::STRB_PRE_REG: {
3532 unsigned ShOpVal = MI.getOperand(4).getImm();
3533 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3534 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3535 if (!isSub &&
3536 (ShImm == 0 ||
3537 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3538 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3539 return 2;
3540 return 3;
3541 }
3542
3543 case ARM::LDRH_PRE:
3544 case ARM::STRH_PRE: {
3545 Register Rt = MI.getOperand(0).getReg();
3546 Register Rm = MI.getOperand(3).getReg();
3547 if (!Rm)
3548 return 2;
3549 if (Rt == Rm)
3550 return 3;
3551 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3552 }
3553
3554 case ARM::LDR_POST_REG:
3555 case ARM::LDRB_POST_REG:
3556 case ARM::LDRH_POST: {
3557 Register Rt = MI.getOperand(0).getReg();
3558 Register Rm = MI.getOperand(3).getReg();
3559 return (Rt == Rm) ? 3 : 2;
3560 }
3561
3562 case ARM::LDR_PRE_IMM:
3563 case ARM::LDRB_PRE_IMM:
3564 case ARM::LDR_POST_IMM:
3565 case ARM::LDRB_POST_IMM:
3566 case ARM::STRB_POST_IMM:
3567 case ARM::STRB_POST_REG:
3568 case ARM::STRB_PRE_IMM:
3569 case ARM::STRH_POST:
3570 case ARM::STR_POST_IMM:
3571 case ARM::STR_POST_REG:
3572 case ARM::STR_PRE_IMM:
3573 return 2;
3574
3575 case ARM::LDRSB_PRE:
3576 case ARM::LDRSH_PRE: {
3577 Register Rm = MI.getOperand(3).getReg();
3578 if (Rm == 0)
3579 return 3;
3580 Register Rt = MI.getOperand(0).getReg();
3581 if (Rt == Rm)
3582 return 4;
3583 unsigned ShOpVal = MI.getOperand(4).getImm();
3584 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3585 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3586 if (!isSub &&
3587 (ShImm == 0 ||
3588 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3589 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3590 return 3;
3591 return 4;
3592 }
3593
3594 case ARM::LDRD: {
3595 Register Rt = MI.getOperand(0).getReg();
3596 Register Rn = MI.getOperand(2).getReg();
3597 Register Rm = MI.getOperand(3).getReg();
3598 if (Rm)
3599 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3600 : 3;
3601 return (Rt == Rn) ? 3 : 2;
3602 }
3603
3604 case ARM::STRD: {
3605 Register Rm = MI.getOperand(3).getReg();
3606 if (Rm)
3607 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3608 : 3;
3609 return 2;
3610 }
3611
3612 case ARM::LDRD_POST:
3613 case ARM::t2LDRD_POST:
3614 return 3;
3615
3616 case ARM::STRD_POST:
3617 case ARM::t2STRD_POST:
3618 return 4;
3619
3620 case ARM::LDRD_PRE: {
3621 Register Rt = MI.getOperand(0).getReg();
3622 Register Rn = MI.getOperand(3).getReg();
3623 Register Rm = MI.getOperand(4).getReg();
3624 if (Rm)
3625 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3626 : 4;
3627 return (Rt == Rn) ? 4 : 3;
3628 }
3629
3630 case ARM::t2LDRD_PRE: {
3631 Register Rt = MI.getOperand(0).getReg();
3632 Register Rn = MI.getOperand(3).getReg();
3633 return (Rt == Rn) ? 4 : 3;
3634 }
3635
3636 case ARM::STRD_PRE: {
3637 Register Rm = MI.getOperand(4).getReg();
3638 if (Rm)
3639 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3640 : 4;
3641 return 3;
3642 }
3643
3644 case ARM::t2STRD_PRE:
3645 return 3;
3646
3647 case ARM::t2LDR_POST:
3648 case ARM::t2LDRB_POST:
3649 case ARM::t2LDRB_PRE:
3650 case ARM::t2LDRSBi12:
3651 case ARM::t2LDRSBi8:
3652 case ARM::t2LDRSBpci:
3653 case ARM::t2LDRSBs:
3654 case ARM::t2LDRH_POST:
3655 case ARM::t2LDRH_PRE:
3656 case ARM::t2LDRSBT:
3657 case ARM::t2LDRSB_POST:
3658 case ARM::t2LDRSB_PRE:
3659 case ARM::t2LDRSH_POST:
3660 case ARM::t2LDRSH_PRE:
3661 case ARM::t2LDRSHi12:
3662 case ARM::t2LDRSHi8:
3663 case ARM::t2LDRSHpci:
3664 case ARM::t2LDRSHs:
3665 return 2;
3666
3667 case ARM::t2LDRDi8: {
3668 Register Rt = MI.getOperand(0).getReg();
3669 Register Rn = MI.getOperand(2).getReg();
3670 return (Rt == Rn) ? 3 : 2;
3671 }
3672
3673 case ARM::t2STRB_POST:
3674 case ARM::t2STRB_PRE:
3675 case ARM::t2STRBs:
3676 case ARM::t2STRDi8:
3677 case ARM::t2STRH_POST:
3678 case ARM::t2STRH_PRE:
3679 case ARM::t2STRHs:
3680 case ARM::t2STR_POST:
3681 case ARM::t2STR_PRE:
3682 case ARM::t2STRs:
3683 return 2;
3684 }
3685}
3686
3687// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3688// can't be easily determined return 0 (missing MachineMemOperand).
3689//
3690// FIXME: The current MachineInstr design does not support relying on machine
3691// mem operands to determine the width of a memory access. Instead, we expect
3692// the target to provide this information based on the instruction opcode and
3693// operands. However, using MachineMemOperand is the best solution now for
3694// two reasons:
3695//
3696// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3697// operands. This is much more dangerous than using the MachineMemOperand
3698// sizes because CodeGen passes can insert/remove optional machine operands. In
3699// fact, it's totally incorrect for preRA passes and appears to be wrong for
3700// postRA passes as well.
3701//
3702// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3703// machine model that calls this should handle the unknown (zero size) case.
3704//
3705// Long term, we should require a target hook that verifies MachineMemOperand
3706// sizes during MC lowering. That target hook should be local to MC lowering
3707// because we can't ensure that it is aware of other MI forms. Doing this will
3708// ensure that MachineMemOperands are correctly propagated through all passes.
3710 unsigned Size = 0;
3711 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3712 E = MI.memoperands_end();
3713 I != E; ++I) {
3714 Size += (*I)->getSize().getValue();
3715 }
3716 // FIXME: The scheduler currently can't handle values larger than 16. But
3717 // the values can actually go up to 32 for floating-point load/store
3718 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3719 // operations isn't right; we could end up with "extra" memory operands for
3720 // various reasons, like tail merge merging two memory operations.
3721 return std::min(Size / 4, 16U);
3722}
3723
3724static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3725 unsigned NumRegs) {
3726 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3727 switch (Opc) {
3728 default:
3729 break;
3730 case ARM::VLDMDIA_UPD:
3731 case ARM::VLDMDDB_UPD:
3732 case ARM::VLDMSIA_UPD:
3733 case ARM::VLDMSDB_UPD:
3734 case ARM::VSTMDIA_UPD:
3735 case ARM::VSTMDDB_UPD:
3736 case ARM::VSTMSIA_UPD:
3737 case ARM::VSTMSDB_UPD:
3738 case ARM::LDMIA_UPD:
3739 case ARM::LDMDA_UPD:
3740 case ARM::LDMDB_UPD:
3741 case ARM::LDMIB_UPD:
3742 case ARM::STMIA_UPD:
3743 case ARM::STMDA_UPD:
3744 case ARM::STMDB_UPD:
3745 case ARM::STMIB_UPD:
3746 case ARM::tLDMIA_UPD:
3747 case ARM::tSTMIA_UPD:
3748 case ARM::t2LDMIA_UPD:
3749 case ARM::t2LDMDB_UPD:
3750 case ARM::t2STMIA_UPD:
3751 case ARM::t2STMDB_UPD:
3752 ++UOps; // One for base register writeback.
3753 break;
3754 case ARM::LDMIA_RET:
3755 case ARM::tPOP_RET:
3756 case ARM::t2LDMIA_RET:
3757 UOps += 2; // One for base reg wb, one for write to pc.
3758 break;
3759 }
3760 return UOps;
3761}
3762
3764 const MachineInstr &MI) const {
3765 if (!ItinData || ItinData->isEmpty())
3766 return 1;
3767
3768 const MCInstrDesc &Desc = MI.getDesc();
3769 unsigned Class = Desc.getSchedClass();
3770 int ItinUOps = ItinData->getNumMicroOps(Class);
3771 if (ItinUOps >= 0) {
3772 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3773 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3774
3775 return ItinUOps;
3776 }
3777
3778 unsigned Opc = MI.getOpcode();
3779 switch (Opc) {
3780 default:
3781 llvm_unreachable("Unexpected multi-uops instruction!");
3782 case ARM::VLDMQIA:
3783 case ARM::VSTMQIA:
3784 return 2;
3785
3786 // The number of uOps for load / store multiple are determined by the number
3787 // registers.
3788 //
3789 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3790 // same cycle. The scheduling for the first load / store must be done
3791 // separately by assuming the address is not 64-bit aligned.
3792 //
3793 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3794 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3795 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3796 case ARM::VLDMDIA:
3797 case ARM::VLDMDIA_UPD:
3798 case ARM::VLDMDDB_UPD:
3799 case ARM::VLDMSIA:
3800 case ARM::VLDMSIA_UPD:
3801 case ARM::VLDMSDB_UPD:
3802 case ARM::VSTMDIA:
3803 case ARM::VSTMDIA_UPD:
3804 case ARM::VSTMDDB_UPD:
3805 case ARM::VSTMSIA:
3806 case ARM::VSTMSIA_UPD:
3807 case ARM::VSTMSDB_UPD: {
3808 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3809 return (NumRegs / 2) + (NumRegs % 2) + 1;
3810 }
3811
3812 case ARM::LDMIA_RET:
3813 case ARM::LDMIA:
3814 case ARM::LDMDA:
3815 case ARM::LDMDB:
3816 case ARM::LDMIB:
3817 case ARM::LDMIA_UPD:
3818 case ARM::LDMDA_UPD:
3819 case ARM::LDMDB_UPD:
3820 case ARM::LDMIB_UPD:
3821 case ARM::STMIA:
3822 case ARM::STMDA:
3823 case ARM::STMDB:
3824 case ARM::STMIB:
3825 case ARM::STMIA_UPD:
3826 case ARM::STMDA_UPD:
3827 case ARM::STMDB_UPD:
3828 case ARM::STMIB_UPD:
3829 case ARM::tLDMIA:
3830 case ARM::tLDMIA_UPD:
3831 case ARM::tSTMIA_UPD:
3832 case ARM::tPOP_RET:
3833 case ARM::tPOP:
3834 case ARM::tPUSH:
3835 case ARM::t2LDMIA_RET:
3836 case ARM::t2LDMIA:
3837 case ARM::t2LDMDB:
3838 case ARM::t2LDMIA_UPD:
3839 case ARM::t2LDMDB_UPD:
3840 case ARM::t2STMIA:
3841 case ARM::t2STMDB:
3842 case ARM::t2STMIA_UPD:
3843 case ARM::t2STMDB_UPD: {
3844 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3845 switch (Subtarget.getLdStMultipleTiming()) {
3847 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3849 // Assume the worst.
3850 return NumRegs;
3852 if (NumRegs < 4)
3853 return 2;
3854 // 4 registers would be issued: 2, 2.
3855 // 5 registers would be issued: 2, 2, 1.
3856 unsigned UOps = (NumRegs / 2);
3857 if (NumRegs % 2)
3858 ++UOps;
3859 return UOps;
3860 }
3862 unsigned UOps = (NumRegs / 2);
3863 // If there are odd number of registers or if it's not 64-bit aligned,
3864 // then it takes an extra AGU (Address Generation Unit) cycle.
3865 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3866 (*MI.memoperands_begin())->getAlign() < Align(8))
3867 ++UOps;
3868 return UOps;
3869 }
3870 }
3871 }
3872 }
3873 llvm_unreachable("Didn't find the number of microops");
3874}
3875
3876std::optional<unsigned>
3877ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3878 const MCInstrDesc &DefMCID, unsigned DefClass,
3879 unsigned DefIdx, unsigned DefAlign) const {
3880 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3881 if (RegNo <= 0)
3882 // Def is the address writeback.
3883 return ItinData->getOperandCycle(DefClass, DefIdx);
3884
3885 unsigned DefCycle;
3886 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3887 // (regno / 2) + (regno % 2) + 1
3888 DefCycle = RegNo / 2 + 1;
3889 if (RegNo % 2)
3890 ++DefCycle;
3891 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3892 DefCycle = RegNo;
3893 bool isSLoad = false;
3894
3895 switch (DefMCID.getOpcode()) {
3896 default: break;
3897 case ARM::VLDMSIA:
3898 case ARM::VLDMSIA_UPD:
3899 case ARM::VLDMSDB_UPD:
3900 isSLoad = true;
3901 break;
3902 }
3903
3904 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3905 // then it takes an extra cycle.
3906 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3907 ++DefCycle;
3908 } else {
3909 // Assume the worst.
3910 DefCycle = RegNo + 2;
3911 }
3912
3913 return DefCycle;
3914}
3915
3916std::optional<unsigned>
3917ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3918 const MCInstrDesc &DefMCID, unsigned DefClass,
3919 unsigned DefIdx, unsigned DefAlign) const {
3920 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3921 if (RegNo <= 0)
3922 // Def is the address writeback.
3923 return ItinData->getOperandCycle(DefClass, DefIdx);
3924
3925 unsigned DefCycle;
3926 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3927 // 4 registers would be issued: 1, 2, 1.
3928 // 5 registers would be issued: 1, 2, 2.
3929 DefCycle = RegNo / 2;
3930 if (DefCycle < 1)
3931 DefCycle = 1;
3932 // Result latency is issue cycle + 2: E2.
3933 DefCycle += 2;
3934 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3935 DefCycle = (RegNo / 2);
3936 // If there are odd number of registers or if it's not 64-bit aligned,
3937 // then it takes an extra AGU (Address Generation Unit) cycle.
3938 if ((RegNo % 2) || DefAlign < 8)
3939 ++DefCycle;
3940 // Result latency is AGU cycles + 2.
3941 DefCycle += 2;
3942 } else {
3943 // Assume the worst.
3944 DefCycle = RegNo + 2;
3945 }
3946
3947 return DefCycle;
3948}
3949
3950std::optional<unsigned>
3951ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3952 const MCInstrDesc &UseMCID, unsigned UseClass,
3953 unsigned UseIdx, unsigned UseAlign) const {
3954 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3955 if (RegNo <= 0)
3956 return ItinData->getOperandCycle(UseClass, UseIdx);
3957
3958 unsigned UseCycle;
3959 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3960 // (regno / 2) + (regno % 2) + 1
3961 UseCycle = RegNo / 2 + 1;
3962 if (RegNo % 2)
3963 ++UseCycle;
3964 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3965 UseCycle = RegNo;
3966 bool isSStore = false;
3967
3968 switch (UseMCID.getOpcode()) {
3969 default: break;
3970 case ARM::VSTMSIA:
3971 case ARM::VSTMSIA_UPD:
3972 case ARM::VSTMSDB_UPD:
3973 isSStore = true;
3974 break;
3975 }
3976
3977 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3978 // then it takes an extra cycle.
3979 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3980 ++UseCycle;
3981 } else {
3982 // Assume the worst.
3983 UseCycle = RegNo + 2;
3984 }
3985
3986 return UseCycle;
3987}
3988
3989std::optional<unsigned>
3990ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3991 const MCInstrDesc &UseMCID, unsigned UseClass,
3992 unsigned UseIdx, unsigned UseAlign) const {
3993 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3994 if (RegNo <= 0)
3995 return ItinData->getOperandCycle(UseClass, UseIdx);
3996
3997 unsigned UseCycle;
3998 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3999 UseCycle = RegNo / 2;
4000 if (UseCycle < 2)
4001 UseCycle = 2;
4002 // Read in E3.
4003 UseCycle += 2;
4004 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4005 UseCycle = (RegNo / 2);
4006 // If there are odd number of registers or if it's not 64-bit aligned,
4007 // then it takes an extra AGU (Address Generation Unit) cycle.
4008 if ((RegNo % 2) || UseAlign < 8)
4009 ++UseCycle;
4010 } else {
4011 // Assume the worst.
4012 UseCycle = 1;
4013 }
4014 return UseCycle;
4015}
4016
4017std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4018 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
4019 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
4020 unsigned UseIdx, unsigned UseAlign) const {
4021 unsigned DefClass = DefMCID.getSchedClass();
4022 unsigned UseClass = UseMCID.getSchedClass();
4023
4024 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4025 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4026
4027 // This may be a def / use of a variable_ops instruction, the operand
4028 // latency might be determinable dynamically. Let the target try to
4029 // figure it out.
4030 std::optional<unsigned> DefCycle;
4031 bool LdmBypass = false;
4032 switch (DefMCID.getOpcode()) {
4033 default:
4034 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4035 break;
4036
4037 case ARM::VLDMDIA:
4038 case ARM::VLDMDIA_UPD:
4039 case ARM::VLDMDDB_UPD:
4040 case ARM::VLDMSIA:
4041 case ARM::VLDMSIA_UPD:
4042 case ARM::VLDMSDB_UPD:
4043 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4044 break;
4045
4046 case ARM::LDMIA_RET:
4047 case ARM::LDMIA:
4048 case ARM::LDMDA:
4049 case ARM::LDMDB:
4050 case ARM::LDMIB:
4051 case ARM::LDMIA_UPD:
4052 case ARM::LDMDA_UPD:
4053 case ARM::LDMDB_UPD:
4054 case ARM::LDMIB_UPD:
4055 case ARM::tLDMIA:
4056 case ARM::tLDMIA_UPD:
4057 case ARM::tPUSH:
4058 case ARM::t2LDMIA_RET:
4059 case ARM::t2LDMIA:
4060 case ARM::t2LDMDB:
4061 case ARM::t2LDMIA_UPD:
4062 case ARM::t2LDMDB_UPD:
4063 LdmBypass = true;
4064 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4065 break;
4066 }
4067
4068 if (!DefCycle)
4069 // We can't seem to determine the result latency of the def, assume it's 2.
4070 DefCycle = 2;
4071
4072 std::optional<unsigned> UseCycle;
4073 switch (UseMCID.getOpcode()) {
4074 default:
4075 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4076 break;
4077
4078 case ARM::VSTMDIA:
4079 case ARM::VSTMDIA_UPD:
4080 case ARM::VSTMDDB_UPD:
4081 case ARM::VSTMSIA:
4082 case ARM::VSTMSIA_UPD:
4083 case ARM::VSTMSDB_UPD:
4084 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4085 break;
4086
4087 case ARM::STMIA:
4088 case ARM::STMDA:
4089 case ARM::STMDB:
4090 case ARM::STMIB:
4091 case ARM::STMIA_UPD:
4092 case ARM::STMDA_UPD:
4093 case ARM::STMDB_UPD:
4094 case ARM::STMIB_UPD:
4095 case ARM::tSTMIA_UPD:
4096 case ARM::tPOP_RET:
4097 case ARM::tPOP:
4098 case ARM::t2STMIA:
4099 case ARM::t2STMDB:
4100 case ARM::t2STMIA_UPD:
4101 case ARM::t2STMDB_UPD:
4102 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4103 break;
4104 }
4105
4106 if (!UseCycle)
4107 // Assume it's read in the first stage.
4108 UseCycle = 1;
4109
4110 if (UseCycle > *DefCycle + 1)
4111 return std::nullopt;
4112
4113 UseCycle = *DefCycle - *UseCycle + 1;
4114 if (UseCycle > 0u) {
4115 if (LdmBypass) {
4116 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4117 // first def operand.
4118 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4119 UseClass, UseIdx))
4120 UseCycle = *UseCycle - 1;
4121 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4122 UseClass, UseIdx)) {
4123 UseCycle = *UseCycle - 1;
4124 }
4125 }
4126
4127 return UseCycle;
4128}
4129
4131 const MachineInstr *MI, unsigned Reg,
4132 unsigned &DefIdx, unsigned &Dist) {
4133 Dist = 0;
4134
4136 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4137 assert(II->isInsideBundle() && "Empty bundle?");
4138
4139 int Idx = -1;
4140 while (II->isInsideBundle()) {
4141 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
4142 if (Idx != -1)
4143 break;
4144 --II;
4145 ++Dist;
4146 }
4147
4148 assert(Idx != -1 && "Cannot find bundled definition!");
4149 DefIdx = Idx;
4150 return &*II;
4151}
4152
4154 const MachineInstr &MI, unsigned Reg,
4155 unsigned &UseIdx, unsigned &Dist) {
4156 Dist = 0;
4157
4159 assert(II->isInsideBundle() && "Empty bundle?");
4160 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4161
4162 // FIXME: This doesn't properly handle multiple uses.
4163 int Idx = -1;
4164 while (II != E && II->isInsideBundle()) {
4165 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4166 if (Idx != -1)
4167 break;
4168 if (II->getOpcode() != ARM::t2IT)
4169 ++Dist;
4170 ++II;
4171 }
4172
4173 if (Idx == -1) {
4174 Dist = 0;
4175 return nullptr;
4176 }
4177
4178 UseIdx = Idx;
4179 return &*II;
4180}
4181
4182/// Return the number of cycles to add to (or subtract from) the static
4183/// itinerary based on the def opcode and alignment. The caller will ensure that
4184/// adjusted latency is at least one cycle.
4185static int adjustDefLatency(const ARMSubtarget &Subtarget,
4186 const MachineInstr &DefMI,
4187 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4188 int Adjust = 0;
4189 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4190 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4191 // variants are one cycle cheaper.
4192 switch (DefMCID.getOpcode()) {
4193 default: break;
4194 case ARM::LDRrs:
4195 case ARM::LDRBrs: {
4196 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4197 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4198 if (ShImm == 0 ||
4199 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4200 --Adjust;
4201 break;
4202 }
4203 case ARM::t2LDRs:
4204 case ARM::t2LDRBs:
4205 case ARM::t2LDRHs:
4206 case ARM::t2LDRSHs: {
4207 // Thumb2 mode: lsl only.
4208 unsigned ShAmt = DefMI.getOperand(3).getImm();
4209 if (ShAmt == 0 || ShAmt == 2)
4210 --Adjust;
4211 break;
4212 }
4213 }
4214 } else if (Subtarget.isSwift()) {
4215 // FIXME: Properly handle all of the latency adjustments for address
4216 // writeback.
4217 switch (DefMCID.getOpcode()) {
4218 default: break;
4219 case ARM::LDRrs:
4220 case ARM::LDRBrs: {
4221 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4222 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4223 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4224 if (!isSub &&
4225 (ShImm == 0 ||
4226 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4227 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4228 Adjust -= 2;
4229 else if (!isSub &&
4230 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4231 --Adjust;
4232 break;
4233 }
4234 case ARM::t2LDRs:
4235 case ARM::t2LDRBs:
4236 case ARM::t2LDRHs:
4237 case ARM::t2LDRSHs: {
4238 // Thumb2 mode: lsl only.
4239 unsigned ShAmt = DefMI.getOperand(3).getImm();
4240 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4241 Adjust -= 2;
4242 break;
4243 }
4244 }
4245 }
4246
4247 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4248 switch (DefMCID.getOpcode()) {
4249 default: break;
4250 case ARM::VLD1q8:
4251 case ARM::VLD1q16:
4252 case ARM::VLD1q32:
4253 case ARM::VLD1q64:
4254 case ARM::VLD1q8wb_fixed:
4255 case ARM::VLD1q16wb_fixed:
4256 case ARM::VLD1q32wb_fixed:
4257 case ARM::VLD1q64wb_fixed:
4258 case ARM::VLD1q8wb_register:
4259 case ARM::VLD1q16wb_register:
4260 case ARM::VLD1q32wb_register:
4261 case ARM::VLD1q64wb_register:
4262 case ARM::VLD2d8:
4263 case ARM::VLD2d16:
4264 case ARM::VLD2d32:
4265 case ARM::VLD2q8:
4266 case ARM::VLD2q16:
4267 case ARM::VLD2q32:
4268 case ARM::VLD2d8wb_fixed:
4269 case ARM::VLD2d16wb_fixed:
4270 case ARM::VLD2d32wb_fixed:
4271 case ARM::VLD2q8wb_fixed:
4272 case ARM::VLD2q16wb_fixed:
4273 case ARM::VLD2q32wb_fixed:
4274 case ARM::VLD2d8wb_register:
4275 case ARM::VLD2d16wb_register:
4276 case ARM::VLD2d32wb_register:
4277 case ARM::VLD2q8wb_register:
4278 case ARM::VLD2q16wb_register:
4279 case ARM::VLD2q32wb_register:
4280 case ARM::VLD3d8:
4281 case ARM::VLD3d16:
4282 case ARM::VLD3d32:
4283 case ARM::VLD1d64T:
4284 case ARM::VLD3d8_UPD:
4285 case ARM::VLD3d16_UPD:
4286 case ARM::VLD3d32_UPD:
4287 case ARM::VLD1d64Twb_fixed:
4288 case ARM::VLD1d64Twb_register:
4289 case ARM::VLD3q8_UPD:
4290 case ARM::VLD3q16_UPD:
4291 case ARM::VLD3q32_UPD:
4292 case ARM::VLD4d8:
4293 case ARM::VLD4d16:
4294 case ARM::VLD4d32:
4295 case ARM::VLD1d64Q:
4296 case ARM::VLD4d8_UPD:
4297 case ARM::VLD4d16_UPD:
4298 case ARM::VLD4d32_UPD:
4299 case ARM::VLD1d64Qwb_fixed:
4300 case ARM::VLD1d64Qwb_register:
4301 case ARM::VLD4q8_UPD:
4302 case ARM::VLD4q16_UPD:
4303 case ARM::VLD4q32_UPD:
4304 case ARM::VLD1DUPq8:
4305 case ARM::VLD1DUPq16:
4306 case ARM::VLD1DUPq32:
4307 case ARM::VLD1DUPq8wb_fixed:
4308 case ARM::VLD1DUPq16wb_fixed:
4309 case ARM::VLD1DUPq32wb_fixed:
4310 case ARM::VLD1DUPq8wb_register:
4311 case ARM::VLD1DUPq16wb_register:
4312 case ARM::VLD1DUPq32wb_register:
4313 case ARM::VLD2DUPd8:
4314 case ARM::VLD2DUPd16:
4315 case ARM::VLD2DUPd32:
4316 case ARM::VLD2DUPd8wb_fixed:
4317 case ARM::VLD2DUPd16wb_fixed:
4318 case ARM::VLD2DUPd32wb_fixed:
4319 case ARM::VLD2DUPd8wb_register:
4320 case ARM::VLD2DUPd16wb_register:
4321 case ARM::VLD2DUPd32wb_register:
4322 case ARM::VLD4DUPd8:
4323 case ARM::VLD4DUPd16:
4324 case ARM::VLD4DUPd32:
4325 case ARM::VLD4DUPd8_UPD:
4326 case ARM::VLD4DUPd16_UPD:
4327 case ARM::VLD4DUPd32_UPD:
4328 case ARM::VLD1LNd8:
4329 case ARM::VLD1LNd16:
4330 case ARM::VLD1LNd32:
4331 case ARM::VLD1LNd8_UPD:
4332 case ARM::VLD1LNd16_UPD:
4333 case ARM::VLD1LNd32_UPD:
4334 case ARM::VLD2LNd8:
4335 case ARM::VLD2LNd16:
4336 case ARM::VLD2LNd32:
4337 case ARM::VLD2LNq16:
4338 case ARM::VLD2LNq32:
4339 case ARM::VLD2LNd8_UPD:
4340 case ARM::VLD2LNd16_UPD:
4341 case ARM::VLD2LNd32_UPD:
4342 case ARM::VLD2LNq16_UPD:
4343 case ARM::VLD2LNq32_UPD:
4344 case ARM::VLD4LNd8:
4345 case ARM::VLD4LNd16:
4346 case ARM::VLD4LNd32:
4347 case ARM::VLD4LNq16:
4348 case ARM::VLD4LNq32:
4349 case ARM::VLD4LNd8_UPD:
4350 case ARM::VLD4LNd16_UPD:
4351 case ARM::VLD4LNd32_UPD:
4352 case ARM::VLD4LNq16_UPD:
4353 case ARM::VLD4LNq32_UPD:
4354 // If the address is not 64-bit aligned, the latencies of these
4355 // instructions increases by one.
4356 ++Adjust;
4357 break;
4358 }
4359 }
4360 return Adjust;
4361}
4362
4364 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4365 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4366 // No operand latency. The caller may fall back to getInstrLatency.
4367 if (!ItinData || ItinData->isEmpty())
4368 return std::nullopt;
4369
4370 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4371 Register Reg = DefMO.getReg();
4372
4373 const MachineInstr *ResolvedDefMI = &DefMI;
4374 unsigned DefAdj = 0;
4375 if (DefMI.isBundle())
4376 ResolvedDefMI =
4377 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4378 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4379 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4380 return 1;
4381 }
4382
4383 const MachineInstr *ResolvedUseMI = &UseMI;
4384 unsigned UseAdj = 0;
4385 if (UseMI.isBundle()) {
4386 ResolvedUseMI =
4387 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4388 if (!ResolvedUseMI)
4389 return std::nullopt;
4390 }
4391
4392 return getOperandLatencyImpl(
4393 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4394 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4395}
4396
4397std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4398 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4399 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4400 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4401 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4402 if (Reg == ARM::CPSR) {
4403 if (DefMI.getOpcode() == ARM::FMSTAT) {
4404 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4405 return Subtarget.isLikeA9() ? 1 : 20;
4406 }
4407
4408 // CPSR set and branch can be paired in the same cycle.
4409 if (UseMI.isBranch())
4410 return 0;
4411
4412 // Otherwise it takes the instruction latency (generally one).
4413 unsigned Latency = getInstrLatency(ItinData, DefMI);
4414
4415 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4416 // its uses. Instructions which are otherwise scheduled between them may
4417 // incur a code size penalty (not able to use the CPSR setting 16-bit
4418 // instructions).
4419 if (Latency > 0 && Subtarget.isThumb2()) {
4420 const MachineFunction *MF = DefMI.getParent()->getParent();
4421 // FIXME: Use Function::hasOptSize().
4422 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4423 --Latency;
4424 }
4425 return Latency;
4426 }
4427
4428 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4429 return std::nullopt;
4430
4431 unsigned DefAlign = DefMI.hasOneMemOperand()
4432 ? (*DefMI.memoperands_begin())->getAlign().value()
4433 : 0;
4434 unsigned UseAlign = UseMI.hasOneMemOperand()
4435 ? (*UseMI.memoperands_begin())->getAlign().value()
4436 : 0;
4437
4438 // Get the itinerary's latency if possible, and handle variable_ops.
4439 std::optional<unsigned> Latency = getOperandLatency(
4440 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4441 // Unable to find operand latency. The caller may resort to getInstrLatency.
4442 if (!Latency)
4443 return std::nullopt;
4444
4445 // Adjust for IT block position.
4446 int Adj = DefAdj + UseAdj;
4447
4448 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4449 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4450 if (Adj >= 0 || (int)*Latency > -Adj) {
4451 return *Latency + Adj;
4452 }
4453 // Return the itinerary latency, which may be zero but not less than zero.
4454 return Latency;
4455}
4456
4457std::optional<unsigned>
4459 SDNode *DefNode, unsigned DefIdx,
4460 SDNode *UseNode, unsigned UseIdx) const {
4461 if (!DefNode->isMachineOpcode())
4462 return 1;
4463
4464 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4465
4466 if (isZeroCost(DefMCID.Opcode))
4467 return 0;
4468
4469 if (!ItinData || ItinData->isEmpty())
4470 return DefMCID.mayLoad() ? 3 : 1;
4471
4472 if (!UseNode->isMachineOpcode()) {
4473 std::optional<unsigned> Latency =
4474 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4475 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4476 int Threshold = 1 + Adj;
4477 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4478 }
4479
4480 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4481 auto *DefMN = cast<MachineSDNode>(DefNode);
4482 unsigned DefAlign = !DefMN->memoperands_empty()
4483 ? (*DefMN->memoperands_begin())->getAlign().value()
4484 : 0;
4485 auto *UseMN = cast<MachineSDNode>(UseNode);
4486 unsigned UseAlign = !UseMN->memoperands_empty()
4487 ? (*UseMN->memoperands_begin())->getAlign().value()
4488 : 0;
4489 std::optional<unsigned> Latency = getOperandLatency(
4490 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4491 if (!Latency)
4492 return std::nullopt;
4493
4494 if (Latency > 1U &&
4495 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4496 Subtarget.isCortexA7())) {
4497 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4498 // variants are one cycle cheaper.
4499 switch (DefMCID.getOpcode()) {
4500 default: break;
4501 case ARM::LDRrs:
4502 case ARM::LDRBrs: {
4503 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4504 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4505 if (ShImm == 0 ||
4506 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4507 Latency = *Latency - 1;
4508 break;
4509 }
4510 case ARM::t2LDRs:
4511 case ARM::t2LDRBs:
4512 case ARM::t2LDRHs:
4513 case ARM::t2LDRSHs: {
4514 // Thumb2 mode: lsl only.
4515 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4516 if (ShAmt == 0 || ShAmt == 2)
4517 Latency = *Latency - 1;
4518 break;
4519 }
4520 }
4521 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4522 // FIXME: Properly handle all of the latency adjustments for address
4523 // writeback.
4524 switch (DefMCID.getOpcode()) {
4525 default: break;
4526 case ARM::LDRrs:
4527 case ARM::LDRBrs: {
4528 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4529 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4530 if (ShImm == 0 ||
4531 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4533 Latency = *Latency - 2;
4534 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4535 Latency = *Latency - 1;
4536 break;
4537 }
4538 case ARM::t2LDRs:
4539 case ARM::t2LDRBs:
4540 case ARM::t2LDRHs:
4541 case ARM::t2LDRSHs:
4542 // Thumb2 mode: lsl 0-3 only.
4543 Latency = *Latency - 2;
4544 break;
4545 }
4546 }
4547
4548 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4549 switch (DefMCID.getOpcode()) {
4550 default: break;
4551 case ARM::VLD1q8:
4552 case ARM::VLD1q16:
4553 case ARM::VLD1q32:
4554 case ARM::VLD1q64:
4555 case ARM::VLD1q8wb_register:
4556 case ARM::VLD1q16wb_register:
4557 case ARM::VLD1q32wb_register:
4558 case ARM::VLD1q64wb_register:
4559 case ARM::VLD1q8wb_fixed:
4560 case ARM::VLD1q16wb_fixed:
4561 case ARM::VLD1q32wb_fixed:
4562 case ARM::VLD1q64wb_fixed:
4563 case ARM::VLD2d8:
4564 case ARM::VLD2d16:
4565 case ARM::VLD2d32:
4566 case ARM::VLD2q8Pseudo:
4567 case ARM::VLD2q16Pseudo:
4568 case ARM::VLD2q32Pseudo:
4569 case ARM::VLD2d8wb_fixed:
4570 case ARM::VLD2d16wb_fixed:
4571 case ARM::VLD2d32wb_fixed:
4572 case ARM::VLD2q8PseudoWB_fixed:
4573 case ARM::VLD2q16PseudoWB_fixed:
4574 case ARM::VLD2q32PseudoWB_fixed:
4575 case ARM::VLD2d8wb_register:
4576 case ARM::VLD2d16wb_register:
4577 case ARM::VLD2d32wb_register:
4578 case ARM::VLD2q8PseudoWB_register:
4579 case ARM::VLD2q16PseudoWB_register:
4580 case ARM::VLD2q32PseudoWB_register:
4581 case ARM::VLD3d8Pseudo:
4582 case ARM::VLD3d16Pseudo:
4583 case ARM::VLD3d32Pseudo:
4584 case ARM::VLD1d8TPseudo:
4585 case ARM::VLD1d16TPseudo:
4586 case ARM::VLD1d32TPseudo:
4587 case ARM::VLD1d64TPseudo:
4588 case ARM::VLD1d64TPseudoWB_fixed:
4589 case ARM::VLD1d64TPseudoWB_register:
4590 case ARM::VLD3d8Pseudo_UPD:
4591 case ARM::VLD3d16Pseudo_UPD:
4592 case ARM::VLD3d32Pseudo_UPD:
4593 case ARM::VLD3q8Pseudo_UPD:
4594 case ARM::VLD3q16Pseudo_UPD:
4595 case ARM::VLD3q32Pseudo_UPD:
4596 case ARM::VLD3q8oddPseudo:
4597 case ARM::VLD3q16oddPseudo:
4598 case ARM::VLD3q32oddPseudo:
4599 case ARM::VLD3q8oddPseudo_UPD:
4600 case ARM::VLD3q16oddPseudo_UPD:
4601 case ARM::VLD3q32oddPseudo_UPD:
4602 case ARM::VLD4d8Pseudo:
4603 case ARM::VLD4d16Pseudo:
4604 case ARM::VLD4d32Pseudo:
4605 case ARM::VLD1d8QPseudo:
4606 case ARM::VLD1d16QPseudo:
4607 case ARM::VLD1d32QPseudo:
4608 case ARM::VLD1d64QPseudo:
4609 case ARM::VLD1d64QPseudoWB_fixed:
4610 case ARM::VLD1d64QPseudoWB_register:
4611 case ARM::VLD1q8HighQPseudo:
4612 case ARM::VLD1q8LowQPseudo_UPD:
4613 case ARM::VLD1q8HighTPseudo:
4614 case ARM::VLD1q8LowTPseudo_UPD:
4615 case ARM::VLD1q16HighQPseudo:
4616 case ARM::VLD1q16LowQPseudo_UPD:
4617 case ARM::VLD1q16HighTPseudo:
4618 case ARM::VLD1q16LowTPseudo_UPD:
4619 case ARM::VLD1q32HighQPseudo:
4620 case ARM::VLD1q32LowQPseudo_UPD:
4621 case ARM::VLD1q32HighTPseudo:
4622 case ARM::VLD1q32LowTPseudo_UPD:
4623 case ARM::VLD1q64HighQPseudo:
4624 case ARM::VLD1q64LowQPseudo_UPD:
4625 case ARM::VLD1q64HighTPseudo:
4626 case ARM::VLD1q64LowTPseudo_UPD:
4627 case ARM::VLD4d8Pseudo_UPD:
4628 case ARM::VLD4d16Pseudo_UPD:
4629 case ARM::VLD4d32Pseudo_UPD:
4630 case ARM::VLD4q8Pseudo_UPD:
4631 case ARM::VLD4q16Pseudo_UPD:
4632 case ARM::VLD4q32Pseudo_UPD:
4633 case ARM::VLD4q8oddPseudo:
4634 case ARM::VLD4q16oddPseudo:
4635 case ARM::VLD4q32oddPseudo:
4636 case ARM::VLD4q8oddPseudo_UPD:
4637 case ARM::VLD4q16oddPseudo_UPD:
4638 case ARM::VLD4q32oddPseudo_UPD:
4639 case ARM::VLD1DUPq8:
4640 case ARM::VLD1DUPq16:
4641 case ARM::VLD1DUPq32:
4642 case ARM::VLD1DUPq8wb_fixed:
4643 case ARM::VLD1DUPq16wb_fixed:
4644 case ARM::VLD1DUPq32wb_fixed:
4645 case ARM::VLD1DUPq8wb_register:
4646 case ARM::VLD1DUPq16wb_register:
4647 case ARM::VLD1DUPq32wb_register:
4648 case ARM::VLD2DUPd8:
4649 case ARM::VLD2DUPd16:
4650 case ARM::VLD2DUPd32:
4651 case ARM::VLD2DUPd8wb_fixed:
4652 case ARM::VLD2DUPd16wb_fixed:
4653 case ARM::VLD2DUPd32wb_fixed:
4654 case ARM::VLD2DUPd8wb_register:
4655 case ARM::VLD2DUPd16wb_register:
4656 case ARM::VLD2DUPd32wb_register:
4657 case ARM::VLD2DUPq8EvenPseudo:
4658 case ARM::VLD2DUPq8OddPseudo:
4659 case ARM::VLD2DUPq16EvenPseudo:
4660 case ARM::VLD2DUPq16OddPseudo:
4661 case ARM::VLD2DUPq32EvenPseudo:
4662 case ARM::VLD2DUPq32OddPseudo:
4663 case ARM::VLD3DUPq8EvenPseudo:
4664 case ARM::VLD3DUPq8OddPseudo:
4665 case ARM::VLD3DUPq16EvenPseudo:
4666 case ARM::VLD3DUPq16OddPseudo:
4667 case ARM::VLD3DUPq32EvenPseudo:
4668 case ARM::VLD3DUPq32OddPseudo:
4669 case ARM::VLD4DUPd8Pseudo:
4670 case ARM::VLD4DUPd16Pseudo:
4671 case ARM::VLD4DUPd32Pseudo:
4672 case ARM::VLD4DUPd8Pseudo_UPD:
4673 case ARM::VLD4DUPd16Pseudo_UPD:
4674 case ARM::VLD4DUPd32Pseudo_UPD:
4675 case ARM::VLD4DUPq8EvenPseudo:
4676 case ARM::VLD4DUPq8OddPseudo:
4677 case ARM::VLD4DUPq16EvenPseudo:
4678 case ARM::VLD4DUPq16OddPseudo:
4679 case ARM::VLD4DUPq32EvenPseudo:
4680 case ARM::VLD4DUPq32OddPseudo:
4681 case ARM::VLD1LNq8Pseudo:
4682 case ARM::VLD1LNq16Pseudo:
4683 case ARM::VLD1LNq32Pseudo:
4684 case ARM::VLD1LNq8Pseudo_UPD:
4685 case ARM::VLD1LNq16Pseudo_UPD:
4686 case ARM::VLD1LNq32Pseudo_UPD:
4687 case ARM::VLD2LNd8Pseudo:
4688 case ARM::VLD2LNd16Pseudo:
4689 case ARM::VLD2LNd32Pseudo:
4690 case ARM::VLD2LNq16Pseudo:
4691 case ARM::VLD2LNq32Pseudo:
4692 case ARM::VLD2LNd8Pseudo_UPD:
4693 case ARM::VLD2LNd16Pseudo_UPD:
4694 case ARM::VLD2LNd32Pseudo_UPD:
4695 case ARM::VLD2LNq16Pseudo_UPD:
4696 case ARM::VLD2LNq32Pseudo_UPD:
4697 case ARM::VLD4LNd8Pseudo:
4698 case ARM::VLD4LNd16Pseudo:
4699 case ARM::VLD4LNd32Pseudo:
4700 case ARM::VLD4LNq16Pseudo:
4701 case ARM::VLD4LNq32Pseudo:
4702 case ARM::VLD4LNd8Pseudo_UPD:
4703 case ARM::VLD4LNd16Pseudo_UPD:
4704 case ARM::VLD4LNd32Pseudo_UPD:
4705 case ARM::VLD4LNq16Pseudo_UPD:
4706 case ARM::VLD4LNq32Pseudo_UPD:
4707 // If the address is not 64-bit aligned, the latencies of these
4708 // instructions increases by one.
4709 Latency = *Latency + 1;
4710 break;
4711 }
4712
4713 return Latency;
4714}
4715
4716unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4717 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4718 MI.isImplicitDef())
4719 return 0;
4720
4721 if (MI.isBundle())
4722 return 0;
4723
4724 const MCInstrDesc &MCID = MI.getDesc();
4725
4726 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4727 !Subtarget.cheapPredicableCPSRDef())) {
4728 // When predicated, CPSR is an additional source operand for CPSR updating
4729 // instructions, this apparently increases their latencies.
4730 return 1;
4731 }
4732 return 0;
4733}
4734
4735unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4736 const MachineInstr &MI,
4737 unsigned *PredCost) const {
4738 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4739 MI.isImplicitDef())
4740 return 1;
4741
4742 // An instruction scheduler typically runs on unbundled instructions, however
4743 // other passes may query the latency of a bundled instruction.
4744 if (MI.isBundle()) {
4745 unsigned Latency = 0;
4747 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4748 while (++I != E && I->isInsideBundle()) {
4749 if (I->getOpcode() != ARM::t2IT)
4750 Latency += getInstrLatency(ItinData, *I, PredCost);
4751 }
4752 return Latency;
4753 }
4754
4755 const MCInstrDesc &MCID = MI.getDesc();
4756 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4757 !Subtarget.cheapPredicableCPSRDef()))) {
4758 // When predicated, CPSR is an additional source operand for CPSR updating
4759 // instructions, this apparently increases their latencies.
4760 *PredCost = 1;
4761 }
4762 // Be sure to call getStageLatency for an empty itinerary in case it has a
4763 // valid MinLatency property.
4764 if (!ItinData)
4765 return MI.mayLoad() ? 3 : 1;
4766
4767 unsigned Class = MCID.getSchedClass();
4768
4769 // For instructions with variable uops, use uops as latency.
4770 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4771 return getNumMicroOps(ItinData, MI);
4772
4773 // For the common case, fall back on the itinerary's latency.
4774 unsigned Latency = ItinData->getStageLatency(Class);
4775
4776 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4777 unsigned DefAlign =
4778 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4779 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4780 if (Adj >= 0 || (int)Latency > -Adj) {
4781 return Latency + Adj;
4782 }
4783 return Latency;
4784}
4785
4786unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4787 SDNode *Node) const {
4788 if (!Node->isMachineOpcode())
4789 return 1;
4790
4791 if (!ItinData || ItinData->isEmpty())
4792 return 1;
4793
4794 unsigned Opcode = Node->getMachineOpcode();
4795 switch (Opcode) {
4796 default:
4797 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4798 case ARM::VLDMQIA:
4799 case ARM::VSTMQIA:
4800 return 2;
4801 }
4802}
4803
4804bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4805 const MachineRegisterInfo *MRI,
4806 const MachineInstr &DefMI,
4807 unsigned DefIdx,
4808 const MachineInstr &UseMI,
4809 unsigned UseIdx) const {
4810 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4811 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4812 if (Subtarget.nonpipelinedVFP() &&
4813 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4814 return true;
4815
4816 // Hoist VFP / NEON instructions with 4 or higher latency.
4817 unsigned Latency =
4818 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4819 if (Latency <= 3)
4820 return false;
4821 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4822 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4823}
4824
4825bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4826 const MachineInstr &DefMI,
4827 unsigned DefIdx) const {
4828 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4829 if (!ItinData || ItinData->isEmpty())
4830 return false;
4831
4832 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4833 if (DDomain == ARMII::DomainGeneral) {
4834 unsigned DefClass = DefMI.getDesc().getSchedClass();
4835 std::optional<unsigned> DefCycle =
4836 ItinData->getOperandCycle(DefClass, DefIdx);
4837 return DefCycle && DefCycle <= 2U;
4838 }
4839 return false;
4840}
4841
4842bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4843 StringRef &ErrInfo) const {
4844 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4845 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4846 return false;
4847 }
4848 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4849 // Make sure we don't generate a lo-lo mov that isn't supported.
4850 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4851 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4852 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4853 return false;
4854 }
4855 }
4856 if (MI.getOpcode() == ARM::tPUSH ||
4857 MI.getOpcode() == ARM::tPOP ||
4858 MI.getOpcode() == ARM::tPOP_RET) {
4859 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4860 if (MO.isImplicit() || !MO.isReg())
4861 continue;
4862 Register Reg = MO.getReg();
4863 if (Reg < ARM::R0 || Reg > ARM::R7) {