LLVM 22.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98
99 // fp SIMD ops
100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108};
109
112 : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
113 Subtarget(STI) {
114 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
115 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
116 llvm_unreachable("Duplicated entries?");
117 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
118 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
119 }
120}
121
122// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
123// currently defaults to no prepass hazard recognizer.
126 const ScheduleDAG *DAG) const {
127 if (usePreRAHazardRecognizer()) {
128 const InstrItineraryData *II =
129 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
130 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
131 }
133}
134
135// Called during:
136// - pre-RA scheduling
137// - post-RA scheduling when FeatureUseMISched is set
139 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
141
142 // We would like to restrict this hazard recognizer to only
143 // post-RA scheduling; we can tell that we're post-RA because we don't
144 // track VRegLiveness.
145 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
146 // banks banked on bit 2. Assume that TCMs are in use.
147 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
149 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
150
151 // Not inserting ARMHazardRecognizerFPMLx because that would change
152 // legacy behavior
153
155 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
156 return MHR;
157}
158
159// Called during post-RA scheduling when FeatureUseMISched is not set
162 const ScheduleDAG *DAG) const {
164
165 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
166 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
167
169 if (BHR)
170 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
171 return MHR;
172}
173
174// Branch analysis.
175// Cond vector output format:
176// 0 elements indicates an unconditional branch
177// 2 elements indicates a conditional branch; the elements are
178// the condition to check and the CPSR.
179// 3 elements indicates a hardware loop end; the elements
180// are the opcode, the operand value to test, and a dummy
181// operand used to pad out to 3 operands.
184 MachineBasicBlock *&FBB,
186 bool AllowModify) const {
187 TBB = nullptr;
188 FBB = nullptr;
189
191 if (I == MBB.instr_begin())
192 return false; // Empty blocks are easy.
193 --I;
194
195 // Walk backwards from the end of the basic block until the branch is
196 // analyzed or we give up.
197 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
198 // Flag to be raised on unanalyzeable instructions. This is useful in cases
199 // where we want to clean up on the end of the basic block before we bail
200 // out.
201 bool CantAnalyze = false;
202
203 // Skip over DEBUG values, predicated nonterminators and speculation
204 // barrier terminators.
205 while (I->isDebugInstr() || !I->isTerminator() ||
206 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
207 I->getOpcode() == ARM::t2DoLoopStartTP){
208 if (I == MBB.instr_begin())
209 return false;
210 --I;
211 }
212
213 if (isIndirectBranchOpcode(I->getOpcode()) ||
214 isJumpTableBranchOpcode(I->getOpcode())) {
215 // Indirect branches and jump tables can't be analyzed, but we still want
216 // to clean up any instructions at the tail of the basic block.
217 CantAnalyze = true;
218 } else if (isUncondBranchOpcode(I->getOpcode())) {
219 TBB = I->getOperand(0).getMBB();
220 } else if (isCondBranchOpcode(I->getOpcode())) {
221 // Bail out if we encounter multiple conditional branches.
222 if (!Cond.empty())
223 return true;
224
225 assert(!FBB && "FBB should have been null.");
226 FBB = TBB;
227 TBB = I->getOperand(0).getMBB();
228 Cond.push_back(I->getOperand(1));
229 Cond.push_back(I->getOperand(2));
230 } else if (I->isReturn()) {
231 // Returns can't be analyzed, but we should run cleanup.
232 CantAnalyze = true;
233 } else if (I->getOpcode() == ARM::t2LoopEnd &&
234 MBB.getParent()
235 ->getSubtarget<ARMSubtarget>()
237 if (!Cond.empty())
238 return true;
239 FBB = TBB;
240 TBB = I->getOperand(1).getMBB();
241 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
242 Cond.push_back(I->getOperand(0));
243 Cond.push_back(MachineOperand::CreateImm(0));
244 } else {
245 // We encountered other unrecognized terminator. Bail out immediately.
246 return true;
247 }
248
249 // Cleanup code - to be run for unpredicated unconditional branches and
250 // returns.
251 if (!isPredicated(*I) &&
252 (isUncondBranchOpcode(I->getOpcode()) ||
253 isIndirectBranchOpcode(I->getOpcode()) ||
254 isJumpTableBranchOpcode(I->getOpcode()) ||
255 I->isReturn())) {
256 // Forget any previous condition branch information - it no longer applies.
257 Cond.clear();
258 FBB = nullptr;
259
260 // If we can modify the function, delete everything below this
261 // unconditional branch.
262 if (AllowModify) {
263 MachineBasicBlock::iterator DI = std::next(I);
264 while (DI != MBB.instr_end()) {
265 MachineInstr &InstToDelete = *DI;
266 ++DI;
267 // Speculation barriers must not be deleted.
268 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
269 continue;
270 InstToDelete.eraseFromParent();
271 }
272 }
273 }
274
275 if (CantAnalyze) {
276 // We may not be able to analyze the block, but we could still have
277 // an unconditional branch as the last instruction in the block, which
278 // just branches to layout successor. If this is the case, then just
279 // remove it if we're allowed to make modifications.
280 if (AllowModify && !isPredicated(MBB.back()) &&
281 isUncondBranchOpcode(MBB.back().getOpcode()) &&
282 TBB && MBB.isLayoutSuccessor(TBB))
284 return true;
285 }
286
287 if (I == MBB.instr_begin())
288 return false;
289
290 --I;
291 }
292
293 // We made it past the terminators without bailing out - we must have
294 // analyzed this branch successfully.
295 return false;
296}
297
299 int *BytesRemoved) const {
300 assert(!BytesRemoved && "code size not handled");
301
302 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
303 if (I == MBB.end())
304 return 0;
305
306 if (!isUncondBranchOpcode(I->getOpcode()) &&
307 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
308 return 0;
309
310 // Remove the branch.
311 I->eraseFromParent();
312
313 I = MBB.end();
314
315 if (I == MBB.begin()) return 1;
316 --I;
317 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
318 return 1;
319
320 // Remove the branch.
321 I->eraseFromParent();
322 return 2;
323}
324
329 const DebugLoc &DL,
330 int *BytesAdded) const {
331 assert(!BytesAdded && "code size not handled");
332 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
333 int BOpc = !AFI->isThumbFunction()
334 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
335 int BccOpc = !AFI->isThumbFunction()
336 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
337 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
338
339 // Shouldn't be a fall through.
340 assert(TBB && "insertBranch must not be told to insert a fallthrough");
341 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
342 "ARM branch conditions have two or three components!");
343
344 // For conditional branches, we use addOperand to preserve CPSR flags.
345
346 if (!FBB) {
347 if (Cond.empty()) { // Unconditional branch?
348 if (isThumb)
350 else
351 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
352 } else if (Cond.size() == 2) {
353 BuildMI(&MBB, DL, get(BccOpc))
354 .addMBB(TBB)
355 .addImm(Cond[0].getImm())
356 .add(Cond[1]);
357 } else
358 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
359 return 1;
360 }
361
362 // Two-way conditional branch.
363 if (Cond.size() == 2)
364 BuildMI(&MBB, DL, get(BccOpc))
365 .addMBB(TBB)
366 .addImm(Cond[0].getImm())
367 .add(Cond[1]);
368 else if (Cond.size() == 3)
369 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
370 if (isThumb)
371 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
372 else
373 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
374 return 2;
375}
376
379 if (Cond.size() == 2) {
380 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
381 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
382 return false;
383 }
384 return true;
385}
386
388 if (MI.isBundle()) {
390 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
391 while (++I != E && I->isInsideBundle()) {
392 int PIdx = I->findFirstPredOperandIdx();
393 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
394 return true;
395 }
396 return false;
397 }
398
399 int PIdx = MI.findFirstPredOperandIdx();
400 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
401}
402
404 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
405 const TargetRegisterInfo *TRI) const {
406
407 // First, let's see if there is a generic comment for this operand
408 std::string GenericComment =
410 if (!GenericComment.empty())
411 return GenericComment;
412
413 // If not, check if we have an immediate operand.
414 if (!Op.isImm())
415 return std::string();
416
417 // And print its corresponding condition code if the immediate is a
418 // predicate.
419 int FirstPredOp = MI.findFirstPredOperandIdx();
420 if (FirstPredOp != (int) OpIdx)
421 return std::string();
422
423 std::string CC = "CC::";
424 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
425 return CC;
426}
427
430 unsigned Opc = MI.getOpcode();
433 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
434 .addImm(Pred[0].getImm())
435 .addReg(Pred[1].getReg());
436 return true;
437 }
438
439 int PIdx = MI.findFirstPredOperandIdx();
440 if (PIdx != -1) {
441 MachineOperand &PMO = MI.getOperand(PIdx);
442 PMO.setImm(Pred[0].getImm());
443 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
444
445 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
446 // IT block. This affects how they are printed.
447 const MCInstrDesc &MCID = MI.getDesc();
448 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
449 assert(MCID.operands()[1].isOptionalDef() &&
450 "CPSR def isn't expected operand");
451 assert((MI.getOperand(1).isDead() ||
452 MI.getOperand(1).getReg() != ARM::CPSR) &&
453 "if conversion tried to stop defining used CPSR");
454 MI.getOperand(1).setReg(ARM::NoRegister);
455 }
456
457 return true;
458 }
459 return false;
460}
461
463 ArrayRef<MachineOperand> Pred2) const {
464 if (Pred1.size() > 2 || Pred2.size() > 2)
465 return false;
466
467 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
468 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
469 if (CC1 == CC2)
470 return true;
471
472 switch (CC1) {
473 default:
474 return false;
475 case ARMCC::AL:
476 return true;
477 case ARMCC::HS:
478 return CC2 == ARMCC::HI;
479 case ARMCC::LS:
480 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
481 case ARMCC::GE:
482 return CC2 == ARMCC::GT;
483 case ARMCC::LE:
484 return CC2 == ARMCC::LT;
485 }
486}
487
489 std::vector<MachineOperand> &Pred,
490 bool SkipDead) const {
491 bool Found = false;
492 for (const MachineOperand &MO : MI.operands()) {
493 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
494 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
495 if (ClobbersCPSR || IsCPSR) {
496
497 // Filter out T1 instructions that have a dead CPSR,
498 // allowing IT blocks to be generated containing T1 instructions
499 const MCInstrDesc &MCID = MI.getDesc();
500 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
501 SkipDead)
502 continue;
503
504 Pred.push_back(MO);
505 Found = true;
506 }
507 }
508
509 return Found;
510}
511
513 for (const auto &MO : MI.operands())
514 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
515 return true;
516 return false;
517}
518
520 switch (MI->getOpcode()) {
521 default: return true;
522 case ARM::tADC: // ADC (register) T1
523 case ARM::tADDi3: // ADD (immediate) T1
524 case ARM::tADDi8: // ADD (immediate) T2
525 case ARM::tADDrr: // ADD (register) T1
526 case ARM::tAND: // AND (register) T1
527 case ARM::tASRri: // ASR (immediate) T1
528 case ARM::tASRrr: // ASR (register) T1
529 case ARM::tBIC: // BIC (register) T1
530 case ARM::tEOR: // EOR (register) T1
531 case ARM::tLSLri: // LSL (immediate) T1
532 case ARM::tLSLrr: // LSL (register) T1
533 case ARM::tLSRri: // LSR (immediate) T1
534 case ARM::tLSRrr: // LSR (register) T1
535 case ARM::tMUL: // MUL T1
536 case ARM::tMVN: // MVN (register) T1
537 case ARM::tORR: // ORR (register) T1
538 case ARM::tROR: // ROR (register) T1
539 case ARM::tRSB: // RSB (immediate) T1
540 case ARM::tSBC: // SBC (register) T1
541 case ARM::tSUBi3: // SUB (immediate) T1
542 case ARM::tSUBi8: // SUB (immediate) T2
543 case ARM::tSUBrr: // SUB (register) T1
545 }
546}
547
548/// isPredicable - Return true if the specified instruction can be predicated.
549/// By default, this returns true for every instruction with a
550/// PredicateOperand.
552 if (!MI.isPredicable())
553 return false;
554
555 if (MI.isBundle())
556 return false;
557
559 return false;
560
561 const MachineFunction *MF = MI.getParent()->getParent();
562 const ARMFunctionInfo *AFI =
564
565 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
566 // In their ARM encoding, they can't be encoded in a conditional form.
567 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
568 return false;
569
570 // Make indirect control flow changes unpredicable when SLS mitigation is
571 // enabled.
572 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
573 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
574 return false;
575 if (ST.hardenSlsBlr() && isIndirectCall(MI))
576 return false;
577
578 if (AFI->isThumb2Function()) {
579 if (getSubtarget().restrictIT())
580 return isV8EligibleForIT(&MI);
581 }
582
583 return true;
584}
585
586namespace llvm {
587
588template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
589 for (const MachineOperand &MO : MI->operands()) {
590 if (!MO.isReg() || MO.isUndef() || MO.isUse())
591 continue;
592 if (MO.getReg() != ARM::CPSR)
593 continue;
594 if (!MO.isDead())
595 return false;
596 }
597 // all definitions of CPSR are dead
598 return true;
599}
600
601} // end namespace llvm
602
603/// GetInstSize - Return the size of the specified MachineInstr.
604///
606 const MachineBasicBlock &MBB = *MI.getParent();
607 const MachineFunction *MF = MBB.getParent();
608 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
609
610 const MCInstrDesc &MCID = MI.getDesc();
611
612 switch (MI.getOpcode()) {
613 default:
614 // Return the size specified in .td file. If there's none, return 0, as we
615 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
616 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
617 // contrast to AArch64 instructions which have a default size of 4 bytes for
618 // example.
619 return MCID.getSize();
620 case TargetOpcode::BUNDLE:
621 return getInstBundleLength(MI);
622 case ARM::CONSTPOOL_ENTRY:
623 case ARM::JUMPTABLE_INSTS:
624 case ARM::JUMPTABLE_ADDRS:
625 case ARM::JUMPTABLE_TBB:
626 case ARM::JUMPTABLE_TBH:
627 // If this machine instr is a constant pool entry, its size is recorded as
628 // operand #2.
629 return MI.getOperand(2).getImm();
630 case ARM::SPACE:
631 return MI.getOperand(1).getImm();
632 case ARM::INLINEASM:
633 case ARM::INLINEASM_BR: {
634 // If this machine instr is an inline asm, measure it.
635 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
637 Size = alignTo(Size, 4);
638 return Size;
639 }
640 }
641}
642
643unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
644 unsigned Size = 0;
646 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
647 while (++I != E && I->isInsideBundle()) {
648 assert(!I->isBundle() && "No nested bundle!");
650 }
651 return Size;
652}
653
656 MCRegister DestReg, bool KillSrc,
657 const ARMSubtarget &Subtarget) const {
658 unsigned Opc = Subtarget.isThumb()
659 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
660 : ARM::MRS;
661
663 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
664
665 // There is only 1 A/R class MRS instruction, and it always refers to
666 // APSR. However, there are lots of other possibilities on M-class cores.
667 if (Subtarget.isMClass())
668 MIB.addImm(0x800);
669
670 MIB.add(predOps(ARMCC::AL))
671 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
672}
673
676 MCRegister SrcReg, bool KillSrc,
677 const ARMSubtarget &Subtarget) const {
678 unsigned Opc = Subtarget.isThumb()
679 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
680 : ARM::MSR;
681
682 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
683
684 if (Subtarget.isMClass())
685 MIB.addImm(0x800);
686 else
687 MIB.addImm(8);
688
689 MIB.addReg(SrcReg, getKillRegState(KillSrc))
692}
693
695 MIB.addImm(ARMVCC::None);
696 MIB.addReg(0);
697 MIB.addReg(0); // tp_reg
698}
699
705
707 MIB.addImm(Cond);
708 MIB.addReg(ARM::VPR, RegState::Implicit);
709 MIB.addReg(0); // tp_reg
710}
711
713 unsigned Cond, unsigned Inactive) {
715 MIB.addReg(Inactive);
716}
717
720 const DebugLoc &DL, Register DestReg,
721 Register SrcReg, bool KillSrc,
722 bool RenamableDest,
723 bool RenamableSrc) const {
724 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
725 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
726
727 if (GPRDest && GPRSrc) {
728 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
729 .addReg(SrcReg, getKillRegState(KillSrc))
731 .add(condCodeOp());
732 return;
733 }
734
735 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
736 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
737
738 unsigned Opc = 0;
739 if (SPRDest && SPRSrc)
740 Opc = ARM::VMOVS;
741 else if (GPRDest && SPRSrc)
742 Opc = ARM::VMOVRS;
743 else if (SPRDest && GPRSrc)
744 Opc = ARM::VMOVSR;
745 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
746 Opc = ARM::VMOVD;
747 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
748 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
749
750 if (Opc) {
751 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
752 MIB.addReg(SrcReg, getKillRegState(KillSrc));
753 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
754 MIB.addReg(SrcReg, getKillRegState(KillSrc));
755 if (Opc == ARM::MVE_VORR)
756 addUnpredicatedMveVpredROp(MIB, DestReg);
757 else if (Opc != ARM::MQPRCopy)
758 MIB.add(predOps(ARMCC::AL));
759 return;
760 }
761
762 // Handle register classes that require multiple instructions.
763 unsigned BeginIdx = 0;
764 unsigned SubRegs = 0;
765 int Spacing = 1;
766
767 // Use VORRq when possible.
768 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
769 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
770 BeginIdx = ARM::qsub_0;
771 SubRegs = 2;
772 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
773 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
774 BeginIdx = ARM::qsub_0;
775 SubRegs = 4;
776 // Fall back to VMOVD.
777 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
778 Opc = ARM::VMOVD;
779 BeginIdx = ARM::dsub_0;
780 SubRegs = 2;
781 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
782 Opc = ARM::VMOVD;
783 BeginIdx = ARM::dsub_0;
784 SubRegs = 3;
785 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
786 Opc = ARM::VMOVD;
787 BeginIdx = ARM::dsub_0;
788 SubRegs = 4;
789 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
790 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
791 BeginIdx = ARM::gsub_0;
792 SubRegs = 2;
793 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
794 Opc = ARM::VMOVD;
795 BeginIdx = ARM::dsub_0;
796 SubRegs = 2;
797 Spacing = 2;
798 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
799 Opc = ARM::VMOVD;
800 BeginIdx = ARM::dsub_0;
801 SubRegs = 3;
802 Spacing = 2;
803 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
804 Opc = ARM::VMOVD;
805 BeginIdx = ARM::dsub_0;
806 SubRegs = 4;
807 Spacing = 2;
808 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
809 !Subtarget.hasFP64()) {
810 Opc = ARM::VMOVS;
811 BeginIdx = ARM::ssub_0;
812 SubRegs = 2;
813 } else if (SrcReg == ARM::CPSR) {
814 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
815 return;
816 } else if (DestReg == ARM::CPSR) {
817 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
818 return;
819 } else if (DestReg == ARM::VPR) {
820 assert(ARM::GPRRegClass.contains(SrcReg));
821 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
822 .addReg(SrcReg, getKillRegState(KillSrc))
824 return;
825 } else if (SrcReg == ARM::VPR) {
826 assert(ARM::GPRRegClass.contains(DestReg));
827 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
828 .addReg(SrcReg, getKillRegState(KillSrc))
830 return;
831 } else if (DestReg == ARM::FPSCR_NZCV) {
832 assert(ARM::GPRRegClass.contains(SrcReg));
833 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
834 .addReg(SrcReg, getKillRegState(KillSrc))
836 return;
837 } else if (SrcReg == ARM::FPSCR_NZCV) {
838 assert(ARM::GPRRegClass.contains(DestReg));
839 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
840 .addReg(SrcReg, getKillRegState(KillSrc))
842 return;
843 }
844
845 assert(Opc && "Impossible reg-to-reg copy");
846
849
850 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
851 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
852 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
853 Spacing = -Spacing;
854 }
855#ifndef NDEBUG
856 SmallSet<unsigned, 4> DstRegs;
857#endif
858 for (unsigned i = 0; i != SubRegs; ++i) {
859 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
860 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
861 assert(Dst && Src && "Bad sub-register");
862#ifndef NDEBUG
863 assert(!DstRegs.count(Src) && "destructive vector copy");
864 DstRegs.insert(Dst);
865#endif
866 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
867 // VORR (NEON or MVE) takes two source operands.
868 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
869 Mov.addReg(Src);
870 }
871 // MVE VORR takes predicate operands in place of an ordinary condition.
872 if (Opc == ARM::MVE_VORR)
874 else
875 Mov = Mov.add(predOps(ARMCC::AL));
876 // MOVr can set CC.
877 if (Opc == ARM::MOVr)
878 Mov = Mov.add(condCodeOp());
879 }
880 // Add implicit super-register defs and kills to the last instruction.
881 Mov->addRegisterDefined(DestReg, TRI);
882 if (KillSrc)
883 Mov->addRegisterKilled(SrcReg, TRI);
884}
885
886std::optional<DestSourcePair>
888 // VMOVRRD is also a copy instruction but it requires
889 // special way of handling. It is more complex copy version
890 // and since that we are not considering it. For recognition
891 // of such instruction isExtractSubregLike MI interface fuction
892 // could be used.
893 // VORRq is considered as a move only if two inputs are
894 // the same register.
895 if (!MI.isMoveReg() ||
896 (MI.getOpcode() == ARM::VORRq &&
897 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
898 return std::nullopt;
899 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
900}
901
902std::optional<ParamLoadedValue>
904 Register Reg) const {
905 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
906 Register DstReg = DstSrcPair->Destination->getReg();
907
908 // TODO: We don't handle cases where the forwarding reg is narrower/wider
909 // than the copy registers. Consider for example:
910 //
911 // s16 = VMOVS s0
912 // s17 = VMOVS s1
913 // call @callee(d0)
914 //
915 // We'd like to describe the call site value of d0 as d8, but this requires
916 // gathering and merging the descriptions for the two VMOVS instructions.
917 //
918 // We also don't handle the reverse situation, where the forwarding reg is
919 // narrower than the copy destination:
920 //
921 // d8 = VMOVD d0
922 // call @callee(s1)
923 //
924 // We need to produce a fragment description (the call site value of s1 is
925 // /not/ just d8).
926 if (DstReg != Reg)
927 return std::nullopt;
928 }
930}
931
933 unsigned Reg,
934 unsigned SubIdx,
935 unsigned State) const {
936 if (!SubIdx)
937 return MIB.addReg(Reg, State);
938
940 return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State);
941 return MIB.addReg(Reg, State, SubIdx);
942}
943
946 Register SrcReg, bool isKill, int FI,
947 const TargetRegisterClass *RC,
948 Register VReg,
949 MachineInstr::MIFlag Flags) const {
950 MachineFunction &MF = *MBB.getParent();
951 MachineFrameInfo &MFI = MF.getFrameInfo();
952 Align Alignment = MFI.getObjectAlign(FI);
954
957 MFI.getObjectSize(FI), Alignment);
958
959 switch (TRI.getSpillSize(*RC)) {
960 case 2:
961 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
962 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
963 .addReg(SrcReg, getKillRegState(isKill))
964 .addFrameIndex(FI)
965 .addImm(0)
966 .addMemOperand(MMO)
968 } else
969 llvm_unreachable("Unknown reg class!");
970 break;
971 case 4:
972 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
973 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
974 .addReg(SrcReg, getKillRegState(isKill))
975 .addFrameIndex(FI)
976 .addImm(0)
977 .addMemOperand(MMO)
979 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
980 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
981 .addReg(SrcReg, getKillRegState(isKill))
982 .addFrameIndex(FI)
983 .addImm(0)
984 .addMemOperand(MMO)
986 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
987 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
988 .addReg(SrcReg, getKillRegState(isKill))
989 .addFrameIndex(FI)
990 .addImm(0)
991 .addMemOperand(MMO)
993 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
994 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
995 .addReg(SrcReg, getKillRegState(isKill))
996 .addFrameIndex(FI)
997 .addImm(0)
998 .addMemOperand(MMO)
1000 } else
1001 llvm_unreachable("Unknown reg class!");
1002 break;
1003 case 8:
1004 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1005 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1006 .addReg(SrcReg, getKillRegState(isKill))
1007 .addFrameIndex(FI)
1008 .addImm(0)
1009 .addMemOperand(MMO)
1011 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1012 if (Subtarget.hasV5TEOps()) {
1013 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1014 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1015 AddDReg(MIB, SrcReg, ARM::gsub_1, 0);
1016 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1018 } else {
1019 // Fallback to STM instruction, which has existed since the dawn of
1020 // time.
1021 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1022 .addFrameIndex(FI)
1023 .addMemOperand(MMO)
1025 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1026 AddDReg(MIB, SrcReg, ARM::gsub_1, 0);
1027 }
1028 } else
1029 llvm_unreachable("Unknown reg class!");
1030 break;
1031 case 16:
1032 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1033 // Use aligned spills if the stack can be realigned.
1034 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1035 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1036 .addFrameIndex(FI)
1037 .addImm(16)
1038 .addReg(SrcReg, getKillRegState(isKill))
1039 .addMemOperand(MMO)
1041 } else {
1042 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1043 .addReg(SrcReg, getKillRegState(isKill))
1044 .addFrameIndex(FI)
1045 .addMemOperand(MMO)
1047 }
1048 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1049 Subtarget.hasMVEIntegerOps()) {
1050 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1051 MIB.addReg(SrcReg, getKillRegState(isKill))
1052 .addFrameIndex(FI)
1053 .addImm(0)
1054 .addMemOperand(MMO);
1056 } else
1057 llvm_unreachable("Unknown reg class!");
1058 break;
1059 case 24:
1060 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1061 // Use aligned spills if the stack can be realigned.
1062 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1063 Subtarget.hasNEON()) {
1064 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1065 .addFrameIndex(FI)
1066 .addImm(16)
1067 .addReg(SrcReg, getKillRegState(isKill))
1068 .addMemOperand(MMO)
1070 } else {
1072 get(ARM::VSTMDIA))
1073 .addFrameIndex(FI)
1075 .addMemOperand(MMO);
1076 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1077 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0);
1078 AddDReg(MIB, SrcReg, ARM::dsub_2, 0);
1079 }
1080 } else
1081 llvm_unreachable("Unknown reg class!");
1082 break;
1083 case 32:
1084 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1085 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1086 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1087 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1088 Subtarget.hasNEON()) {
1089 // FIXME: It's possible to only store part of the QQ register if the
1090 // spilled def has a sub-register index.
1091 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1092 .addFrameIndex(FI)
1093 .addImm(16)
1094 .addReg(SrcReg, getKillRegState(isKill))
1095 .addMemOperand(MMO)
1097 } else if (Subtarget.hasMVEIntegerOps()) {
1098 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1099 .addReg(SrcReg, getKillRegState(isKill))
1100 .addFrameIndex(FI)
1101 .addMemOperand(MMO);
1102 } else {
1104 get(ARM::VSTMDIA))
1105 .addFrameIndex(FI)
1107 .addMemOperand(MMO);
1108 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1109 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0);
1110 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0);
1111 AddDReg(MIB, SrcReg, ARM::dsub_3, 0);
1112 }
1113 } else
1114 llvm_unreachable("Unknown reg class!");
1115 break;
1116 case 64:
1117 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1118 Subtarget.hasMVEIntegerOps()) {
1119 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1120 .addReg(SrcReg, getKillRegState(isKill))
1121 .addFrameIndex(FI)
1122 .addMemOperand(MMO);
1123 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1124 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1125 .addFrameIndex(FI)
1127 .addMemOperand(MMO);
1128 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1129 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0);
1130 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0);
1131 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0);
1132 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0);
1133 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0);
1134 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0);
1135 AddDReg(MIB, SrcReg, ARM::dsub_7, 0);
1136 } else
1137 llvm_unreachable("Unknown reg class!");
1138 break;
1139 default:
1140 llvm_unreachable("Unknown reg class!");
1141 }
1142}
1143
1145 int &FrameIndex) const {
1146 switch (MI.getOpcode()) {
1147 default: break;
1148 case ARM::STRrs:
1149 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1150 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1151 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1152 MI.getOperand(3).getImm() == 0) {
1153 FrameIndex = MI.getOperand(1).getIndex();
1154 return MI.getOperand(0).getReg();
1155 }
1156 break;
1157 case ARM::STRi12:
1158 case ARM::t2STRi12:
1159 case ARM::tSTRspi:
1160 case ARM::VSTRD:
1161 case ARM::VSTRS:
1162 case ARM::VSTRH:
1163 case ARM::VSTR_P0_off:
1164 case ARM::VSTR_FPSCR_NZCVQC_off:
1165 case ARM::MVE_VSTRWU32:
1166 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1167 MI.getOperand(2).getImm() == 0) {
1168 FrameIndex = MI.getOperand(1).getIndex();
1169 return MI.getOperand(0).getReg();
1170 }
1171 break;
1172 case ARM::VST1q64:
1173 case ARM::VST1d64TPseudo:
1174 case ARM::VST1d64QPseudo:
1175 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1176 FrameIndex = MI.getOperand(0).getIndex();
1177 return MI.getOperand(2).getReg();
1178 }
1179 break;
1180 case ARM::VSTMQIA:
1181 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1182 FrameIndex = MI.getOperand(1).getIndex();
1183 return MI.getOperand(0).getReg();
1184 }
1185 break;
1186 case ARM::MQQPRStore:
1187 case ARM::MQQQQPRStore:
1188 if (MI.getOperand(1).isFI()) {
1189 FrameIndex = MI.getOperand(1).getIndex();
1190 return MI.getOperand(0).getReg();
1191 }
1192 break;
1193 }
1194
1195 return 0;
1196}
1197
1199 int &FrameIndex) const {
1201 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1202 Accesses.size() == 1) {
1203 FrameIndex =
1204 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1205 ->getFrameIndex();
1206 return true;
1207 }
1208 return false;
1209}
1210
1213 Register DestReg, int FI,
1214 const TargetRegisterClass *RC,
1215 Register VReg,
1216 MachineInstr::MIFlag Flags) const {
1217 DebugLoc DL;
1218 if (I != MBB.end()) DL = I->getDebugLoc();
1219 MachineFunction &MF = *MBB.getParent();
1220 MachineFrameInfo &MFI = MF.getFrameInfo();
1221 const Align Alignment = MFI.getObjectAlign(FI);
1224 MFI.getObjectSize(FI), Alignment);
1225
1227 switch (TRI.getSpillSize(*RC)) {
1228 case 2:
1229 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1230 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1231 .addFrameIndex(FI)
1232 .addImm(0)
1233 .addMemOperand(MMO)
1235 } else
1236 llvm_unreachable("Unknown reg class!");
1237 break;
1238 case 4:
1239 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1240 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1241 .addFrameIndex(FI)
1242 .addImm(0)
1243 .addMemOperand(MMO)
1245 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1246 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1247 .addFrameIndex(FI)
1248 .addImm(0)
1249 .addMemOperand(MMO)
1251 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1252 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1253 .addFrameIndex(FI)
1254 .addImm(0)
1255 .addMemOperand(MMO)
1257 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1258 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1259 .addFrameIndex(FI)
1260 .addImm(0)
1261 .addMemOperand(MMO)
1263 } else
1264 llvm_unreachable("Unknown reg class!");
1265 break;
1266 case 8:
1267 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1268 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1269 .addFrameIndex(FI)
1270 .addImm(0)
1271 .addMemOperand(MMO)
1273 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1275
1276 if (Subtarget.hasV5TEOps()) {
1277 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1278 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1279 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1280 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1282 } else {
1283 // Fallback to LDM instruction, which has existed since the dawn of
1284 // time.
1285 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1286 .addFrameIndex(FI)
1287 .addMemOperand(MMO)
1289 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1290 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1291 }
1292
1293 if (DestReg.isPhysical())
1294 MIB.addReg(DestReg, RegState::ImplicitDefine);
1295 } else
1296 llvm_unreachable("Unknown reg class!");
1297 break;
1298 case 16:
1299 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1300 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1301 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1302 .addFrameIndex(FI)
1303 .addImm(16)
1304 .addMemOperand(MMO)
1306 } else {
1307 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1308 .addFrameIndex(FI)
1309 .addMemOperand(MMO)
1311 }
1312 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1313 Subtarget.hasMVEIntegerOps()) {
1314 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1315 MIB.addFrameIndex(FI)
1316 .addImm(0)
1317 .addMemOperand(MMO);
1319 } else
1320 llvm_unreachable("Unknown reg class!");
1321 break;
1322 case 24:
1323 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1324 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1325 Subtarget.hasNEON()) {
1326 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1327 .addFrameIndex(FI)
1328 .addImm(16)
1329 .addMemOperand(MMO)
1331 } else {
1332 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1333 .addFrameIndex(FI)
1334 .addMemOperand(MMO)
1336 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1337 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1338 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1339 if (DestReg.isPhysical())
1340 MIB.addReg(DestReg, RegState::ImplicitDefine);
1341 }
1342 } else
1343 llvm_unreachable("Unknown reg class!");
1344 break;
1345 case 32:
1346 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1347 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1348 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1349 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1350 Subtarget.hasNEON()) {
1351 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1352 .addFrameIndex(FI)
1353 .addImm(16)
1354 .addMemOperand(MMO)
1356 } else if (Subtarget.hasMVEIntegerOps()) {
1357 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1358 .addFrameIndex(FI)
1359 .addMemOperand(MMO);
1360 } else {
1361 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1362 .addFrameIndex(FI)
1364 .addMemOperand(MMO);
1365 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1366 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1367 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1368 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1369 if (DestReg.isPhysical())
1370 MIB.addReg(DestReg, RegState::ImplicitDefine);
1371 }
1372 } else
1373 llvm_unreachable("Unknown reg class!");
1374 break;
1375 case 64:
1376 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1377 Subtarget.hasMVEIntegerOps()) {
1378 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1379 .addFrameIndex(FI)
1380 .addMemOperand(MMO);
1381 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1382 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1383 .addFrameIndex(FI)
1385 .addMemOperand(MMO);
1386 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1387 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1388 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1389 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1390 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead);
1391 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead);
1392 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead);
1393 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead);
1394 if (DestReg.isPhysical())
1395 MIB.addReg(DestReg, RegState::ImplicitDefine);
1396 } else
1397 llvm_unreachable("Unknown reg class!");
1398 break;
1399 default:
1400 llvm_unreachable("Unknown regclass!");
1401 }
1402}
1403
1405 int &FrameIndex) const {
1406 switch (MI.getOpcode()) {
1407 default: break;
1408 case ARM::LDRrs:
1409 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1410 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1411 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1412 MI.getOperand(3).getImm() == 0) {
1413 FrameIndex = MI.getOperand(1).getIndex();
1414 return MI.getOperand(0).getReg();
1415 }
1416 break;
1417 case ARM::LDRi12:
1418 case ARM::t2LDRi12:
1419 case ARM::tLDRspi:
1420 case ARM::VLDRD:
1421 case ARM::VLDRS:
1422 case ARM::VLDRH:
1423 case ARM::VLDR_P0_off:
1424 case ARM::VLDR_FPSCR_NZCVQC_off:
1425 case ARM::MVE_VLDRWU32:
1426 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1427 MI.getOperand(2).getImm() == 0) {
1428 FrameIndex = MI.getOperand(1).getIndex();
1429 return MI.getOperand(0).getReg();
1430 }
1431 break;
1432 case ARM::VLD1q64:
1433 case ARM::VLD1d8TPseudo:
1434 case ARM::VLD1d16TPseudo:
1435 case ARM::VLD1d32TPseudo:
1436 case ARM::VLD1d64TPseudo:
1437 case ARM::VLD1d8QPseudo:
1438 case ARM::VLD1d16QPseudo:
1439 case ARM::VLD1d32QPseudo:
1440 case ARM::VLD1d64QPseudo:
1441 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1442 FrameIndex = MI.getOperand(1).getIndex();
1443 return MI.getOperand(0).getReg();
1444 }
1445 break;
1446 case ARM::VLDMQIA:
1447 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1448 FrameIndex = MI.getOperand(1).getIndex();
1449 return MI.getOperand(0).getReg();
1450 }
1451 break;
1452 case ARM::MQQPRLoad:
1453 case ARM::MQQQQPRLoad:
1454 if (MI.getOperand(1).isFI()) {
1455 FrameIndex = MI.getOperand(1).getIndex();
1456 return MI.getOperand(0).getReg();
1457 }
1458 break;
1459 }
1460
1461 return 0;
1462}
1463
1465 int &FrameIndex) const {
1467 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1468 Accesses.size() == 1) {
1469 FrameIndex =
1470 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1471 ->getFrameIndex();
1472 return true;
1473 }
1474 return false;
1475}
1476
1477/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1478/// depending on whether the result is used.
1479void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1480 bool isThumb1 = Subtarget.isThumb1Only();
1481 bool isThumb2 = Subtarget.isThumb2();
1482 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1483
1484 DebugLoc dl = MI->getDebugLoc();
1485 MachineBasicBlock *BB = MI->getParent();
1486
1487 MachineInstrBuilder LDM, STM;
1488 if (isThumb1 || !MI->getOperand(1).isDead()) {
1489 MachineOperand LDWb(MI->getOperand(1));
1490 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1491 : isThumb1 ? ARM::tLDMIA_UPD
1492 : ARM::LDMIA_UPD))
1493 .add(LDWb);
1494 } else {
1495 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1496 }
1497
1498 if (isThumb1 || !MI->getOperand(0).isDead()) {
1499 MachineOperand STWb(MI->getOperand(0));
1500 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1501 : isThumb1 ? ARM::tSTMIA_UPD
1502 : ARM::STMIA_UPD))
1503 .add(STWb);
1504 } else {
1505 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1506 }
1507
1508 MachineOperand LDBase(MI->getOperand(3));
1509 LDM.add(LDBase).add(predOps(ARMCC::AL));
1510
1511 MachineOperand STBase(MI->getOperand(2));
1512 STM.add(STBase).add(predOps(ARMCC::AL));
1513
1514 // Sort the scratch registers into ascending order.
1515 const TargetRegisterInfo &TRI = getRegisterInfo();
1516 SmallVector<unsigned, 6> ScratchRegs;
1517 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1518 ScratchRegs.push_back(MO.getReg());
1519 llvm::sort(ScratchRegs,
1520 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1521 return TRI.getEncodingValue(Reg1) <
1522 TRI.getEncodingValue(Reg2);
1523 });
1524
1525 for (const auto &Reg : ScratchRegs) {
1528 }
1529
1530 BB->erase(MI);
1531}
1532
1534 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1535 expandLoadStackGuard(MI);
1536 MI.getParent()->erase(MI);
1537 return true;
1538 }
1539
1540 if (MI.getOpcode() == ARM::MEMCPY) {
1541 expandMEMCPY(MI);
1542 return true;
1543 }
1544
1545 // This hook gets to expand COPY instructions before they become
1546 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1547 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1548 // changed into a VORR that can go down the NEON pipeline.
1549 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1550 return false;
1551
1552 // Look for a copy between even S-registers. That is where we keep floats
1553 // when using NEON v2f32 instructions for f32 arithmetic.
1554 Register DstRegS = MI.getOperand(0).getReg();
1555 Register SrcRegS = MI.getOperand(1).getReg();
1556 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1557 return false;
1558
1560 MCRegister DstRegD =
1561 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1562 MCRegister SrcRegD =
1563 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1564 if (!DstRegD || !SrcRegD)
1565 return false;
1566
1567 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1568 // legal if the COPY already defines the full DstRegD, and it isn't a
1569 // sub-register insertion.
1570 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1571 return false;
1572
1573 // A dead copy shouldn't show up here, but reject it just in case.
1574 if (MI.getOperand(0).isDead())
1575 return false;
1576
1577 // All clear, widen the COPY.
1578 LLVM_DEBUG(dbgs() << "widening: " << MI);
1579 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1580
1581 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1582 // or some other super-register.
1583 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1584 if (ImpDefIdx != -1)
1585 MI.removeOperand(ImpDefIdx);
1586
1587 // Change the opcode and operands.
1588 MI.setDesc(get(ARM::VMOVD));
1589 MI.getOperand(0).setReg(DstRegD);
1590 MI.getOperand(1).setReg(SrcRegD);
1591 MIB.add(predOps(ARMCC::AL));
1592
1593 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1594 // register scavenger and machine verifier, so we need to indicate that we
1595 // are reading an undefined value from SrcRegD, but a proper value from
1596 // SrcRegS.
1597 MI.getOperand(1).setIsUndef();
1598 MIB.addReg(SrcRegS, RegState::Implicit);
1599
1600 // SrcRegD may actually contain an unrelated value in the ssub_1
1601 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1602 if (MI.getOperand(1).isKill()) {
1603 MI.getOperand(1).setIsKill(false);
1604 MI.addRegisterKilled(SrcRegS, TRI, true);
1605 }
1606
1607 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1608 return true;
1609}
1610
1611/// Create a copy of a const pool value. Update CPI to the new index and return
1612/// the label UID.
1613static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1616
1617 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1618 assert(MCPE.isMachineConstantPoolEntry() &&
1619 "Expecting a machine constantpool entry!");
1620 ARMConstantPoolValue *ACPV =
1621 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1622
1623 unsigned PCLabelId = AFI->createPICLabelUId();
1624 ARMConstantPoolValue *NewCPV = nullptr;
1625
1626 // FIXME: The below assumes PIC relocation model and that the function
1627 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1628 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1629 // instructions, so that's probably OK, but is PIC always correct when
1630 // we get here?
1631 if (ACPV->isGlobalValue())
1633 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1634 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1635 else if (ACPV->isExtSymbol())
1638 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1639 else if (ACPV->isBlockAddress())
1641 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1643 else if (ACPV->isLSDA())
1644 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1645 ARMCP::CPLSDA, 4);
1646 else if (ACPV->isMachineBasicBlock())
1647 NewCPV = ARMConstantPoolMBB::
1649 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1650 else
1651 llvm_unreachable("Unexpected ARM constantpool value type!!");
1652 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1653 return PCLabelId;
1654}
1655
1658 Register DestReg, unsigned SubIdx,
1659 const MachineInstr &Orig) const {
1660 unsigned Opcode = Orig.getOpcode();
1661 switch (Opcode) {
1662 default: {
1663 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1664 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1665 MBB.insert(I, MI);
1666 break;
1667 }
1668 case ARM::tLDRpci_pic:
1669 case ARM::t2LDRpci_pic: {
1670 MachineFunction &MF = *MBB.getParent();
1671 unsigned CPI = Orig.getOperand(1).getIndex();
1672 unsigned PCLabelId = duplicateCPV(MF, CPI);
1673 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1675 .addImm(PCLabelId)
1676 .cloneMemRefs(Orig);
1677 break;
1678 }
1679 }
1680}
1681
1684 MachineBasicBlock::iterator InsertBefore,
1685 const MachineInstr &Orig) const {
1686 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1688 for (;;) {
1689 switch (I->getOpcode()) {
1690 case ARM::tLDRpci_pic:
1691 case ARM::t2LDRpci_pic: {
1692 MachineFunction &MF = *MBB.getParent();
1693 unsigned CPI = I->getOperand(1).getIndex();
1694 unsigned PCLabelId = duplicateCPV(MF, CPI);
1695 I->getOperand(1).setIndex(CPI);
1696 I->getOperand(2).setImm(PCLabelId);
1697 break;
1698 }
1699 }
1700 if (!I->isBundledWithSucc())
1701 break;
1702 ++I;
1703 }
1704 return Cloned;
1705}
1706
1708 const MachineInstr &MI1,
1709 const MachineRegisterInfo *MRI) const {
1710 unsigned Opcode = MI0.getOpcode();
1711 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1712 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1713 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1714 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1715 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1716 Opcode == ARM::t2MOV_ga_pcrel) {
1717 if (MI1.getOpcode() != Opcode)
1718 return false;
1719 if (MI0.getNumOperands() != MI1.getNumOperands())
1720 return false;
1721
1722 const MachineOperand &MO0 = MI0.getOperand(1);
1723 const MachineOperand &MO1 = MI1.getOperand(1);
1724 if (MO0.getOffset() != MO1.getOffset())
1725 return false;
1726
1727 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1728 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1729 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1730 Opcode == ARM::t2MOV_ga_pcrel)
1731 // Ignore the PC labels.
1732 return MO0.getGlobal() == MO1.getGlobal();
1733
1734 const MachineFunction *MF = MI0.getParent()->getParent();
1735 const MachineConstantPool *MCP = MF->getConstantPool();
1736 int CPI0 = MO0.getIndex();
1737 int CPI1 = MO1.getIndex();
1738 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1739 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1740 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1741 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1742 if (isARMCP0 && isARMCP1) {
1743 ARMConstantPoolValue *ACPV0 =
1744 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1745 ARMConstantPoolValue *ACPV1 =
1746 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1747 return ACPV0->hasSameValue(ACPV1);
1748 } else if (!isARMCP0 && !isARMCP1) {
1749 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1750 }
1751 return false;
1752 } else if (Opcode == ARM::PICLDR) {
1753 if (MI1.getOpcode() != Opcode)
1754 return false;
1755 if (MI0.getNumOperands() != MI1.getNumOperands())
1756 return false;
1757
1758 Register Addr0 = MI0.getOperand(1).getReg();
1759 Register Addr1 = MI1.getOperand(1).getReg();
1760 if (Addr0 != Addr1) {
1761 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1762 return false;
1763
1764 // This assumes SSA form.
1765 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1766 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1767 // Check if the loaded value, e.g. a constantpool of a global address, are
1768 // the same.
1769 if (!produceSameValue(*Def0, *Def1, MRI))
1770 return false;
1771 }
1772
1773 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1774 // %12 = PICLDR %11, 0, 14, %noreg
1775 const MachineOperand &MO0 = MI0.getOperand(i);
1776 const MachineOperand &MO1 = MI1.getOperand(i);
1777 if (!MO0.isIdenticalTo(MO1))
1778 return false;
1779 }
1780 return true;
1781 }
1782
1784}
1785
1786/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1787/// determine if two loads are loading from the same base address. It should
1788/// only return true if the base pointers are the same and the only differences
1789/// between the two addresses is the offset. It also returns the offsets by
1790/// reference.
1791///
1792/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1793/// is permanently disabled.
1795 int64_t &Offset1,
1796 int64_t &Offset2) const {
1797 // Don't worry about Thumb: just ARM and Thumb2.
1798 if (Subtarget.isThumb1Only()) return false;
1799
1800 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1801 return false;
1802
1803 auto IsLoadOpcode = [&](unsigned Opcode) {
1804 switch (Opcode) {
1805 default:
1806 return false;
1807 case ARM::LDRi12:
1808 case ARM::LDRBi12:
1809 case ARM::LDRD:
1810 case ARM::LDRH:
1811 case ARM::LDRSB:
1812 case ARM::LDRSH:
1813 case ARM::VLDRD:
1814 case ARM::VLDRS:
1815 case ARM::t2LDRi8:
1816 case ARM::t2LDRBi8:
1817 case ARM::t2LDRDi8:
1818 case ARM::t2LDRSHi8:
1819 case ARM::t2LDRi12:
1820 case ARM::t2LDRBi12:
1821 case ARM::t2LDRSHi12:
1822 return true;
1823 }
1824 };
1825
1826 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1827 !IsLoadOpcode(Load2->getMachineOpcode()))
1828 return false;
1829
1830 // Check if base addresses and chain operands match.
1831 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1832 Load1->getOperand(4) != Load2->getOperand(4))
1833 return false;
1834
1835 // Index should be Reg0.
1836 if (Load1->getOperand(3) != Load2->getOperand(3))
1837 return false;
1838
1839 // Determine the offsets.
1840 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1841 isa<ConstantSDNode>(Load2->getOperand(1))) {
1842 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1843 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1844 return true;
1845 }
1846
1847 return false;
1848}
1849
1850/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1851/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1852/// be scheduled togther. On some targets if two loads are loading from
1853/// addresses in the same cache line, it's better if they are scheduled
1854/// together. This function takes two integers that represent the load offsets
1855/// from the common base address. It returns true if it decides it's desirable
1856/// to schedule the two loads together. "NumLoads" is the number of loads that
1857/// have already been scheduled after Load1.
1858///
1859/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1860/// is permanently disabled.
1862 int64_t Offset1, int64_t Offset2,
1863 unsigned NumLoads) const {
1864 // Don't worry about Thumb: just ARM and Thumb2.
1865 if (Subtarget.isThumb1Only()) return false;
1866
1867 assert(Offset2 > Offset1);
1868
1869 if ((Offset2 - Offset1) / 8 > 64)
1870 return false;
1871
1872 // Check if the machine opcodes are different. If they are different
1873 // then we consider them to not be of the same base address,
1874 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1875 // In this case, they are considered to be the same because they are different
1876 // encoding forms of the same basic instruction.
1877 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1878 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1879 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1880 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1881 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1882 return false; // FIXME: overly conservative?
1883
1884 // Four loads in a row should be sufficient.
1885 if (NumLoads >= 3)
1886 return false;
1887
1888 return true;
1889}
1890
1892 const MachineBasicBlock *MBB,
1893 const MachineFunction &MF) const {
1894 // Debug info is never a scheduling boundary. It's necessary to be explicit
1895 // due to the special treatment of IT instructions below, otherwise a
1896 // dbg_value followed by an IT will result in the IT instruction being
1897 // considered a scheduling hazard, which is wrong. It should be the actual
1898 // instruction preceding the dbg_value instruction(s), just like it is
1899 // when debug info is not present.
1900 if (MI.isDebugInstr())
1901 return false;
1902
1903 // Terminators and labels can't be scheduled around.
1904 if (MI.isTerminator() || MI.isPosition())
1905 return true;
1906
1907 // INLINEASM_BR can jump to another block
1908 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1909 return true;
1910
1911 if (isSEHInstruction(MI))
1912 return true;
1913
1914 // Treat the start of the IT block as a scheduling boundary, but schedule
1915 // t2IT along with all instructions following it.
1916 // FIXME: This is a big hammer. But the alternative is to add all potential
1917 // true and anti dependencies to IT block instructions as implicit operands
1918 // to the t2IT instruction. The added compile time and complexity does not
1919 // seem worth it.
1921 // Make sure to skip any debug instructions
1922 while (++I != MBB->end() && I->isDebugInstr())
1923 ;
1924 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1925 return true;
1926
1927 // Don't attempt to schedule around any instruction that defines
1928 // a stack-oriented pointer, as it's unlikely to be profitable. This
1929 // saves compile time, because it doesn't require every single
1930 // stack slot reference to depend on the instruction that does the
1931 // modification.
1932 // Calls don't actually change the stack pointer, even if they have imp-defs.
1933 // No ARM calling conventions change the stack pointer. (X86 calling
1934 // conventions sometimes do).
1935 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
1936 return true;
1937
1938 return false;
1939}
1940
1943 unsigned NumCycles, unsigned ExtraPredCycles,
1944 BranchProbability Probability) const {
1945 if (!NumCycles)
1946 return false;
1947
1948 // If we are optimizing for size, see if the branch in the predecessor can be
1949 // lowered to cbn?z by the constant island lowering pass, and return false if
1950 // so. This results in a shorter instruction sequence.
1951 if (MBB.getParent()->getFunction().hasOptSize()) {
1952 MachineBasicBlock *Pred = *MBB.pred_begin();
1953 if (!Pred->empty()) {
1954 MachineInstr *LastMI = &*Pred->rbegin();
1955 if (LastMI->getOpcode() == ARM::t2Bcc) {
1957 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
1958 if (CmpMI)
1959 return false;
1960 }
1961 }
1962 }
1963 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1964 MBB, 0, 0, Probability);
1965}
1966
1969 unsigned TCycles, unsigned TExtra,
1970 MachineBasicBlock &FBB,
1971 unsigned FCycles, unsigned FExtra,
1972 BranchProbability Probability) const {
1973 if (!TCycles)
1974 return false;
1975
1976 // In thumb code we often end up trading one branch for a IT block, and
1977 // if we are cloning the instruction can increase code size. Prevent
1978 // blocks with multiple predecesors from being ifcvted to prevent this
1979 // cloning.
1980 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1981 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1982 return false;
1983 }
1984
1985 // Attempt to estimate the relative costs of predication versus branching.
1986 // Here we scale up each component of UnpredCost to avoid precision issue when
1987 // scaling TCycles/FCycles by Probability.
1988 const unsigned ScalingUpFactor = 1024;
1989
1990 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1991 unsigned UnpredCost;
1992 if (!Subtarget.hasBranchPredictor()) {
1993 // When we don't have a branch predictor it's always cheaper to not take a
1994 // branch than take it, so we have to take that into account.
1995 unsigned NotTakenBranchCost = 1;
1996 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1997 unsigned TUnpredCycles, FUnpredCycles;
1998 if (!FCycles) {
1999 // Triangle: TBB is the fallthrough
2000 TUnpredCycles = TCycles + NotTakenBranchCost;
2001 FUnpredCycles = TakenBranchCost;
2002 } else {
2003 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2004 TUnpredCycles = TCycles + TakenBranchCost;
2005 FUnpredCycles = FCycles + NotTakenBranchCost;
2006 // The branch at the end of FBB will disappear when it's predicated, so
2007 // discount it from PredCost.
2008 PredCost -= 1 * ScalingUpFactor;
2009 }
2010 // The total cost is the cost of each path scaled by their probabilites
2011 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2012 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2013 UnpredCost = TUnpredCost + FUnpredCost;
2014 // When predicating assume that the first IT can be folded away but later
2015 // ones cost one cycle each
2016 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2017 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2018 }
2019 } else {
2020 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2021 unsigned FUnpredCost =
2022 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2023 UnpredCost = TUnpredCost + FUnpredCost;
2024 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2025 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2026 }
2027
2028 return PredCost <= UnpredCost;
2029}
2030
2031unsigned
2033 unsigned NumInsts) const {
2034 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2035 // ARM has a condition code field in every predicable instruction, using it
2036 // doesn't change code size.
2037 if (!Subtarget.isThumb2())
2038 return 0;
2039
2040 // It's possible that the size of the IT is restricted to a single block.
2041 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2042 return divideCeil(NumInsts, MaxInsts) * 2;
2043}
2044
2045unsigned
2047 // If this branch is likely to be folded into the comparison to form a
2048 // CB(N)Z, then removing it won't reduce code size at all, because that will
2049 // just replace the CB(N)Z with a CMP.
2050 if (MI.getOpcode() == ARM::t2Bcc &&
2052 return 0;
2053
2054 unsigned Size = getInstSizeInBytes(MI);
2055
2056 // For Thumb2, all branches are 32-bit instructions during the if conversion
2057 // pass, but may be replaced with 16-bit instructions during size reduction.
2058 // Since the branches considered by if conversion tend to be forward branches
2059 // over small basic blocks, they are very likely to be in range for the
2060 // narrow instructions, so we assume the final code size will be half what it
2061 // currently is.
2062 if (Subtarget.isThumb2())
2063 Size /= 2;
2064
2065 return Size;
2066}
2067
2068bool
2070 MachineBasicBlock &FMBB) const {
2071 // Reduce false anti-dependencies to let the target's out-of-order execution
2072 // engine do its thing.
2073 return Subtarget.isProfitableToUnpredicate();
2074}
2075
2076/// getInstrPredicate - If instruction is predicated, returns its predicate
2077/// condition, otherwise returns AL. It also returns the condition code
2078/// register by reference.
2080 Register &PredReg) {
2081 int PIdx = MI.findFirstPredOperandIdx();
2082 if (PIdx == -1) {
2083 PredReg = 0;
2084 return ARMCC::AL;
2085 }
2086
2087 PredReg = MI.getOperand(PIdx+1).getReg();
2088 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2089}
2090
2092 if (Opc == ARM::B)
2093 return ARM::Bcc;
2094 if (Opc == ARM::tB)
2095 return ARM::tBcc;
2096 if (Opc == ARM::t2B)
2097 return ARM::t2Bcc;
2098
2099 llvm_unreachable("Unknown unconditional branch opcode!");
2100}
2101
2103 bool NewMI,
2104 unsigned OpIdx1,
2105 unsigned OpIdx2) const {
2106 switch (MI.getOpcode()) {
2107 case ARM::MOVCCr:
2108 case ARM::t2MOVCCr: {
2109 // MOVCC can be commuted by inverting the condition.
2110 Register PredReg;
2111 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2112 // MOVCC AL can't be inverted. Shouldn't happen.
2113 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2114 return nullptr;
2115 MachineInstr *CommutedMI =
2116 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2117 if (!CommutedMI)
2118 return nullptr;
2119 // After swapping the MOVCC operands, also invert the condition.
2120 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2122 return CommutedMI;
2123 }
2124 }
2125 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2126}
2127
2128/// Identify instructions that can be folded into a MOVCC instruction, and
2129/// return the defining instruction.
2131ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2132 const TargetInstrInfo *TII) const {
2133 if (!Reg.isVirtual())
2134 return nullptr;
2135 if (!MRI.hasOneNonDBGUse(Reg))
2136 return nullptr;
2137 MachineInstr *MI = MRI.getVRegDef(Reg);
2138 if (!MI)
2139 return nullptr;
2140 // Check if MI can be predicated and folded into the MOVCC.
2141 if (!isPredicable(*MI))
2142 return nullptr;
2143 // Check if MI has any non-dead defs or physreg uses. This also detects
2144 // predicated instructions which will be reading CPSR.
2145 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2146 // Reject frame index operands, PEI can't handle the predicated pseudos.
2147 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2148 return nullptr;
2149 if (!MO.isReg())
2150 continue;
2151 // MI can't have any tied operands, that would conflict with predication.
2152 if (MO.isTied())
2153 return nullptr;
2154 if (MO.getReg().isPhysical())
2155 return nullptr;
2156 if (MO.isDef() && !MO.isDead())
2157 return nullptr;
2158 }
2159 bool DontMoveAcrossStores = true;
2160 if (!MI->isSafeToMove(DontMoveAcrossStores))
2161 return nullptr;
2162 return MI;
2163}
2164
2167 unsigned &TrueOp, unsigned &FalseOp,
2168 bool &Optimizable) const {
2169 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2170 "Unknown select instruction");
2171 // MOVCC operands:
2172 // 0: Def.
2173 // 1: True use.
2174 // 2: False use.
2175 // 3: Condition code.
2176 // 4: CPSR use.
2177 TrueOp = 1;
2178 FalseOp = 2;
2179 Cond.push_back(MI.getOperand(3));
2180 Cond.push_back(MI.getOperand(4));
2181 // We can always fold a def.
2182 Optimizable = true;
2183 return false;
2184}
2185
2189 bool PreferFalse) const {
2190 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2191 "Unknown select instruction");
2192 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2193 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2194 bool Invert = !DefMI;
2195 if (!DefMI)
2196 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2197 if (!DefMI)
2198 return nullptr;
2199
2200 // Find new register class to use.
2201 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2202 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2203 Register DestReg = MI.getOperand(0).getReg();
2204 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2205 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2206 if (!MRI.constrainRegClass(DestReg, FalseClass))
2207 return nullptr;
2208 if (!MRI.constrainRegClass(DestReg, TrueClass))
2209 return nullptr;
2210
2211 // Create a new predicated version of DefMI.
2212 // Rfalse is the first use.
2213 MachineInstrBuilder NewMI =
2214 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2215
2216 // Copy all the DefMI operands, excluding its (null) predicate.
2217 const MCInstrDesc &DefDesc = DefMI->getDesc();
2218 for (unsigned i = 1, e = DefDesc.getNumOperands();
2219 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2220 NewMI.add(DefMI->getOperand(i));
2221
2222 unsigned CondCode = MI.getOperand(3).getImm();
2223 if (Invert)
2225 else
2226 NewMI.addImm(CondCode);
2227 NewMI.add(MI.getOperand(4));
2228
2229 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2230 if (NewMI->hasOptionalDef())
2231 NewMI.add(condCodeOp());
2232
2233 // The output register value when the predicate is false is an implicit
2234 // register operand tied to the first def.
2235 // The tie makes the register allocator ensure the FalseReg is allocated the
2236 // same register as operand 0.
2237 FalseReg.setImplicit();
2238 NewMI.add(FalseReg);
2239 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2240
2241 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2242 SeenMIs.insert(NewMI);
2243 SeenMIs.erase(DefMI);
2244
2245 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2246 // DefMI would be invalid when tranferred inside the loop. Checking for a
2247 // loop is expensive, but at least remove kill flags if they are in different
2248 // BBs.
2249 if (DefMI->getParent() != MI.getParent())
2250 NewMI->clearKillInfo();
2251
2252 // The caller will erase MI, but not DefMI.
2253 DefMI->eraseFromParent();
2254 return NewMI;
2255}
2256
2257/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2258/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2259/// def operand.
2260///
2261/// This will go away once we can teach tblgen how to set the optional CPSR def
2262/// operand itself.
2267
2269 {ARM::ADDSri, ARM::ADDri},
2270 {ARM::ADDSrr, ARM::ADDrr},
2271 {ARM::ADDSrsi, ARM::ADDrsi},
2272 {ARM::ADDSrsr, ARM::ADDrsr},
2273
2274 {ARM::SUBSri, ARM::SUBri},
2275 {ARM::SUBSrr, ARM::SUBrr},
2276 {ARM::SUBSrsi, ARM::SUBrsi},
2277 {ARM::SUBSrsr, ARM::SUBrsr},
2278
2279 {ARM::RSBSri, ARM::RSBri},
2280 {ARM::RSBSrsi, ARM::RSBrsi},
2281 {ARM::RSBSrsr, ARM::RSBrsr},
2282
2283 {ARM::tADDSi3, ARM::tADDi3},
2284 {ARM::tADDSi8, ARM::tADDi8},
2285 {ARM::tADDSrr, ARM::tADDrr},
2286 {ARM::tADCS, ARM::tADC},
2287
2288 {ARM::tSUBSi3, ARM::tSUBi3},
2289 {ARM::tSUBSi8, ARM::tSUBi8},
2290 {ARM::tSUBSrr, ARM::tSUBrr},
2291 {ARM::tSBCS, ARM::tSBC},
2292 {ARM::tRSBS, ARM::tRSB},
2293 {ARM::tLSLSri, ARM::tLSLri},
2294
2295 {ARM::t2ADDSri, ARM::t2ADDri},
2296 {ARM::t2ADDSrr, ARM::t2ADDrr},
2297 {ARM::t2ADDSrs, ARM::t2ADDrs},
2298
2299 {ARM::t2SUBSri, ARM::t2SUBri},
2300 {ARM::t2SUBSrr, ARM::t2SUBrr},
2301 {ARM::t2SUBSrs, ARM::t2SUBrs},
2302
2303 {ARM::t2RSBSri, ARM::t2RSBri},
2304 {ARM::t2RSBSrs, ARM::t2RSBrs},
2305};
2306
2307unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2308 for (const auto &Entry : AddSubFlagsOpcodeMap)
2309 if (OldOpc == Entry.PseudoOpc)
2310 return Entry.MachineOpc;
2311 return 0;
2312}
2313
2316 const DebugLoc &dl, Register DestReg,
2317 Register BaseReg, int NumBytes,
2318 ARMCC::CondCodes Pred, Register PredReg,
2319 const ARMBaseInstrInfo &TII,
2320 unsigned MIFlags) {
2321 if (NumBytes == 0 && DestReg != BaseReg) {
2322 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2323 .addReg(BaseReg, RegState::Kill)
2324 .add(predOps(Pred, PredReg))
2325 .add(condCodeOp())
2326 .setMIFlags(MIFlags);
2327 return;
2328 }
2329
2330 bool isSub = NumBytes < 0;
2331 if (isSub) NumBytes = -NumBytes;
2332
2333 while (NumBytes) {
2334 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2335 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2336 assert(ThisVal && "Didn't extract field correctly");
2337
2338 // We will handle these bits from offset, clear them.
2339 NumBytes &= ~ThisVal;
2340
2341 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2342
2343 // Build the new ADD / SUB.
2344 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2345 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2346 .addReg(BaseReg, RegState::Kill)
2347 .addImm(ThisVal)
2348 .add(predOps(Pred, PredReg))
2349 .add(condCodeOp())
2350 .setMIFlags(MIFlags);
2351 BaseReg = DestReg;
2352 }
2353}
2354
2357 unsigned NumBytes) {
2358 // This optimisation potentially adds lots of load and store
2359 // micro-operations, it's only really a great benefit to code-size.
2360 if (!Subtarget.hasMinSize())
2361 return false;
2362
2363 // If only one register is pushed/popped, LLVM can use an LDR/STR
2364 // instead. We can't modify those so make sure we're dealing with an
2365 // instruction we understand.
2366 bool IsPop = isPopOpcode(MI->getOpcode());
2367 bool IsPush = isPushOpcode(MI->getOpcode());
2368 if (!IsPush && !IsPop)
2369 return false;
2370
2371 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2372 MI->getOpcode() == ARM::VLDMDIA_UPD;
2373 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2374 MI->getOpcode() == ARM::tPOP ||
2375 MI->getOpcode() == ARM::tPOP_RET;
2376
2377 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2378 MI->getOperand(1).getReg() == ARM::SP)) &&
2379 "trying to fold sp update into non-sp-updating push/pop");
2380
2381 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2382 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2383 // if this is violated.
2384 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2385 return false;
2386
2387 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2388 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2389 int RegListIdx = IsT1PushPop ? 2 : 4;
2390
2391 // Calculate the space we'll need in terms of registers.
2392 unsigned RegsNeeded;
2393 const TargetRegisterClass *RegClass;
2394 if (IsVFPPushPop) {
2395 RegsNeeded = NumBytes / 8;
2396 RegClass = &ARM::DPRRegClass;
2397 } else {
2398 RegsNeeded = NumBytes / 4;
2399 RegClass = &ARM::GPRRegClass;
2400 }
2401
2402 // We're going to have to strip all list operands off before
2403 // re-adding them since the order matters, so save the existing ones
2404 // for later.
2406
2407 // We're also going to need the first register transferred by this
2408 // instruction, which won't necessarily be the first register in the list.
2409 unsigned FirstRegEnc = -1;
2410
2412 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2413 MachineOperand &MO = MI->getOperand(i);
2414 RegList.push_back(MO);
2415
2416 if (MO.isReg() && !MO.isImplicit() &&
2417 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2418 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2419 }
2420
2421 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2422
2423 // Now try to find enough space in the reglist to allocate NumBytes.
2424 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2425 --CurRegEnc) {
2426 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2427 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2428 continue;
2429 if (!IsPop) {
2430 // Pushing any register is completely harmless, mark the register involved
2431 // as undef since we don't care about its value and must not restore it
2432 // during stack unwinding.
2433 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2434 false, false, true));
2435 --RegsNeeded;
2436 continue;
2437 }
2438
2439 // However, we can only pop an extra register if it's not live. For
2440 // registers live within the function we might clobber a return value
2441 // register; the other way a register can be live here is if it's
2442 // callee-saved.
2443 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2444 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2446 // VFP pops don't allow holes in the register list, so any skip is fatal
2447 // for our transformation. GPR pops do, so we should just keep looking.
2448 if (IsVFPPushPop)
2449 return false;
2450 else
2451 continue;
2452 }
2453
2454 // Mark the unimportant registers as <def,dead> in the POP.
2455 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2456 true));
2457 --RegsNeeded;
2458 }
2459
2460 if (RegsNeeded > 0)
2461 return false;
2462
2463 // Finally we know we can profitably perform the optimisation so go
2464 // ahead: strip all existing registers off and add them back again
2465 // in the right order.
2466 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2467 MI->removeOperand(i);
2468
2469 // Add the complete list back in.
2470 MachineInstrBuilder MIB(MF, &*MI);
2471 for (const MachineOperand &MO : llvm::reverse(RegList))
2472 MIB.add(MO);
2473
2474 return true;
2475}
2476
2477bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2478 Register FrameReg, int &Offset,
2479 const ARMBaseInstrInfo &TII) {
2480 unsigned Opcode = MI.getOpcode();
2481 const MCInstrDesc &Desc = MI.getDesc();
2482 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2483 bool isSub = false;
2484
2485 // Memory operands in inline assembly always use AddrMode2.
2486 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2488
2489 if (Opcode == ARM::ADDri) {
2490 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2491 if (Offset == 0) {
2492 // Turn it into a move.
2493 MI.setDesc(TII.get(ARM::MOVr));
2494 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2495 MI.removeOperand(FrameRegIdx+1);
2496 Offset = 0;
2497 return true;
2498 } else if (Offset < 0) {
2499 Offset = -Offset;
2500 isSub = true;
2501 MI.setDesc(TII.get(ARM::SUBri));
2502 }
2503
2504 // Common case: small offset, fits into instruction.
2505 if (ARM_AM::getSOImmVal(Offset) != -1) {
2506 // Replace the FrameIndex with sp / fp
2507 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2508 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2509 Offset = 0;
2510 return true;
2511 }
2512
2513 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2514 // as possible.
2515 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2516 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2517
2518 // We will handle these bits from offset, clear them.
2519 Offset &= ~ThisImmVal;
2520
2521 // Get the properly encoded SOImmVal field.
2522 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2523 "Bit extraction didn't work?");
2524 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2525 } else {
2526 unsigned ImmIdx = 0;
2527 int InstrOffs = 0;
2528 unsigned NumBits = 0;
2529 unsigned Scale = 1;
2530 switch (AddrMode) {
2532 ImmIdx = FrameRegIdx + 1;
2533 InstrOffs = MI.getOperand(ImmIdx).getImm();
2534 NumBits = 12;
2535 break;
2536 case ARMII::AddrMode2:
2537 ImmIdx = FrameRegIdx+2;
2538 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2539 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2540 InstrOffs *= -1;
2541 NumBits = 12;
2542 break;
2543 case ARMII::AddrMode3:
2544 ImmIdx = FrameRegIdx+2;
2545 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2546 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2547 InstrOffs *= -1;
2548 NumBits = 8;
2549 break;
2550 case ARMII::AddrMode4:
2551 case ARMII::AddrMode6:
2552 // Can't fold any offset even if it's zero.
2553 return false;
2554 case ARMII::AddrMode5:
2555 ImmIdx = FrameRegIdx+1;
2556 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2557 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2558 InstrOffs *= -1;
2559 NumBits = 8;
2560 Scale = 4;
2561 break;
2563 ImmIdx = FrameRegIdx+1;
2564 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2565 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2566 InstrOffs *= -1;
2567 NumBits = 8;
2568 Scale = 2;
2569 break;
2573 ImmIdx = FrameRegIdx+1;
2574 InstrOffs = MI.getOperand(ImmIdx).getImm();
2575 NumBits = 7;
2576 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2577 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2578 break;
2579 default:
2580 llvm_unreachable("Unsupported addressing mode!");
2581 }
2582
2583 Offset += InstrOffs * Scale;
2584 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2585 if (Offset < 0) {
2586 Offset = -Offset;
2587 isSub = true;
2588 }
2589
2590 // Attempt to fold address comp. if opcode has offset bits
2591 if (NumBits > 0) {
2592 // Common case: small offset, fits into instruction.
2593 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2594 int ImmedOffset = Offset / Scale;
2595 unsigned Mask = (1 << NumBits) - 1;
2596 if ((unsigned)Offset <= Mask * Scale) {
2597 // Replace the FrameIndex with sp
2598 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2599 // FIXME: When addrmode2 goes away, this will simplify (like the
2600 // T2 version), as the LDR.i12 versions don't need the encoding
2601 // tricks for the offset value.
2602 if (isSub) {
2604 ImmedOffset = -ImmedOffset;
2605 else
2606 ImmedOffset |= 1 << NumBits;
2607 }
2608 ImmOp.ChangeToImmediate(ImmedOffset);
2609 Offset = 0;
2610 return true;
2611 }
2612
2613 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2614 ImmedOffset = ImmedOffset & Mask;
2615 if (isSub) {
2617 ImmedOffset = -ImmedOffset;
2618 else
2619 ImmedOffset |= 1 << NumBits;
2620 }
2621 ImmOp.ChangeToImmediate(ImmedOffset);
2622 Offset &= ~(Mask*Scale);
2623 }
2624 }
2625
2626 Offset = (isSub) ? -Offset : Offset;
2627 return Offset == 0;
2628}
2629
2630/// analyzeCompare - For a comparison instruction, return the source registers
2631/// in SrcReg and SrcReg2 if having two register operands, and the value it
2632/// compares against in CmpValue. Return true if the comparison instruction
2633/// can be analyzed.
2635 Register &SrcReg2, int64_t &CmpMask,
2636 int64_t &CmpValue) const {
2637 switch (MI.getOpcode()) {
2638 default: break;
2639 case ARM::CMPri:
2640 case ARM::t2CMPri:
2641 case ARM::tCMPi8:
2642 SrcReg = MI.getOperand(0).getReg();
2643 SrcReg2 = 0;
2644 CmpMask = ~0;
2645 CmpValue = MI.getOperand(1).getImm();
2646 return true;
2647 case ARM::CMPrr:
2648 case ARM::t2CMPrr:
2649 case ARM::tCMPr:
2650 SrcReg = MI.getOperand(0).getReg();
2651 SrcReg2 = MI.getOperand(1).getReg();
2652 CmpMask = ~0;
2653 CmpValue = 0;
2654 return true;
2655 case ARM::TSTri:
2656 case ARM::t2TSTri:
2657 SrcReg = MI.getOperand(0).getReg();
2658 SrcReg2 = 0;
2659 CmpMask = MI.getOperand(1).getImm();
2660 CmpValue = 0;
2661 return true;
2662 }
2663
2664 return false;
2665}
2666
2667/// isSuitableForMask - Identify a suitable 'and' instruction that
2668/// operates on the given source register and applies the same mask
2669/// as a 'tst' instruction. Provide a limited look-through for copies.
2670/// When successful, MI will hold the found instruction.
2672 int CmpMask, bool CommonUse) {
2673 switch (MI->getOpcode()) {
2674 case ARM::ANDri:
2675 case ARM::t2ANDri:
2676 if (CmpMask != MI->getOperand(2).getImm())
2677 return false;
2678 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2679 return true;
2680 break;
2681 }
2682
2683 return false;
2684}
2685
2686/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2687/// the condition code if we modify the instructions such that flags are
2688/// set by ADD(a,b,X).
2690 switch (CC) {
2691 default: return ARMCC::AL;
2692 case ARMCC::HS: return ARMCC::LO;
2693 case ARMCC::LO: return ARMCC::HS;
2694 case ARMCC::VS: return ARMCC::VS;
2695 case ARMCC::VC: return ARMCC::VC;
2696 }
2697}
2698
2699/// isRedundantFlagInstr - check whether the first instruction, whose only
2700/// purpose is to update flags, can be made redundant.
2701/// CMPrr can be made redundant by SUBrr if the operands are the same.
2702/// CMPri can be made redundant by SUBri if the operands are the same.
2703/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2704/// This function can be extended later on.
2705inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2706 Register SrcReg, Register SrcReg2,
2707 int64_t ImmValue,
2708 const MachineInstr *OI,
2709 bool &IsThumb1) {
2710 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2711 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2712 ((OI->getOperand(1).getReg() == SrcReg &&
2713 OI->getOperand(2).getReg() == SrcReg2) ||
2714 (OI->getOperand(1).getReg() == SrcReg2 &&
2715 OI->getOperand(2).getReg() == SrcReg))) {
2716 IsThumb1 = false;
2717 return true;
2718 }
2719
2720 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2721 ((OI->getOperand(2).getReg() == SrcReg &&
2722 OI->getOperand(3).getReg() == SrcReg2) ||
2723 (OI->getOperand(2).getReg() == SrcReg2 &&
2724 OI->getOperand(3).getReg() == SrcReg))) {
2725 IsThumb1 = true;
2726 return true;
2727 }
2728
2729 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2730 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2731 OI->getOperand(1).getReg() == SrcReg &&
2732 OI->getOperand(2).getImm() == ImmValue) {
2733 IsThumb1 = false;
2734 return true;
2735 }
2736
2737 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2738 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2739 OI->getOperand(2).getReg() == SrcReg &&
2740 OI->getOperand(3).getImm() == ImmValue) {
2741 IsThumb1 = true;
2742 return true;
2743 }
2744
2745 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2746 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2747 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2748 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2749 OI->getOperand(0).getReg() == SrcReg &&
2750 OI->getOperand(1).getReg() == SrcReg2) {
2751 IsThumb1 = false;
2752 return true;
2753 }
2754
2755 if (CmpI->getOpcode() == ARM::tCMPr &&
2756 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2757 OI->getOpcode() == ARM::tADDrr) &&
2758 OI->getOperand(0).getReg() == SrcReg &&
2759 OI->getOperand(2).getReg() == SrcReg2) {
2760 IsThumb1 = true;
2761 return true;
2762 }
2763
2764 return false;
2765}
2766
2767static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2768 switch (MI->getOpcode()) {
2769 default: return false;
2770 case ARM::tLSLri:
2771 case ARM::tLSRri:
2772 case ARM::tLSLrr:
2773 case ARM::tLSRrr:
2774 case ARM::tSUBrr:
2775 case ARM::tADDrr:
2776 case ARM::tADDi3:
2777 case ARM::tADDi8:
2778 case ARM::tSUBi3:
2779 case ARM::tSUBi8:
2780 case ARM::tMUL:
2781 case ARM::tADC:
2782 case ARM::tSBC:
2783 case ARM::tRSB:
2784 case ARM::tAND:
2785 case ARM::tORR:
2786 case ARM::tEOR:
2787 case ARM::tBIC:
2788 case ARM::tMVN:
2789 case ARM::tASRri:
2790 case ARM::tASRrr:
2791 case ARM::tROR:
2792 IsThumb1 = true;
2793 [[fallthrough]];
2794 case ARM::RSBrr:
2795 case ARM::RSBri:
2796 case ARM::RSCrr:
2797 case ARM::RSCri:
2798 case ARM::ADDrr:
2799 case ARM::ADDri:
2800 case ARM::ADCrr:
2801 case ARM::ADCri:
2802 case ARM::SUBrr:
2803 case ARM::SUBri:
2804 case ARM::SBCrr:
2805 case ARM::SBCri:
2806 case ARM::t2RSBri:
2807 case ARM::t2ADDrr:
2808 case ARM::t2ADDri:
2809 case ARM::t2ADCrr:
2810 case ARM::t2ADCri:
2811 case ARM::t2SUBrr:
2812 case ARM::t2SUBri:
2813 case ARM::t2SBCrr:
2814 case ARM::t2SBCri:
2815 case ARM::ANDrr:
2816 case ARM::ANDri:
2817 case ARM::ANDrsr:
2818 case ARM::ANDrsi:
2819 case ARM::t2ANDrr:
2820 case ARM::t2ANDri:
2821 case ARM::t2ANDrs:
2822 case ARM::ORRrr:
2823 case ARM::ORRri:
2824 case ARM::ORRrsr:
2825 case ARM::ORRrsi:
2826 case ARM::t2ORRrr:
2827 case ARM::t2ORRri:
2828 case ARM::t2ORRrs:
2829 case ARM::EORrr:
2830 case ARM::EORri:
2831 case ARM::EORrsr:
2832 case ARM::EORrsi:
2833 case ARM::t2EORrr:
2834 case ARM::t2EORri:
2835 case ARM::t2EORrs:
2836 case ARM::BICri:
2837 case ARM::BICrr:
2838 case ARM::BICrsi:
2839 case ARM::BICrsr:
2840 case ARM::t2BICri:
2841 case ARM::t2BICrr:
2842 case ARM::t2BICrs:
2843 case ARM::t2LSRri:
2844 case ARM::t2LSRrr:
2845 case ARM::t2LSLri:
2846 case ARM::t2LSLrr:
2847 case ARM::MOVsr:
2848 case ARM::MOVsi:
2849 return true;
2850 }
2851}
2852
2853/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2854/// comparison into one that sets the zero bit in the flags register;
2855/// Remove a redundant Compare instruction if an earlier instruction can set the
2856/// flags in the same way as Compare.
2857/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2858/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2859/// condition code of instructions which use the flags.
2861 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2862 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2863 // Get the unique definition of SrcReg.
2864 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2865 if (!MI) return false;
2866
2867 // Masked compares sometimes use the same register as the corresponding 'and'.
2868 if (CmpMask != ~0) {
2869 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2870 MI = nullptr;
2872 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2873 UI != UE; ++UI) {
2874 if (UI->getParent() != CmpInstr.getParent())
2875 continue;
2876 MachineInstr *PotentialAND = &*UI;
2877 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2878 isPredicated(*PotentialAND))
2879 continue;
2880 MI = PotentialAND;
2881 break;
2882 }
2883 if (!MI) return false;
2884 }
2885 }
2886
2887 // Get ready to iterate backward from CmpInstr.
2888 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2889 B = CmpInstr.getParent()->begin();
2890
2891 // Early exit if CmpInstr is at the beginning of the BB.
2892 if (I == B) return false;
2893
2894 // There are two possible candidates which can be changed to set CPSR:
2895 // One is MI, the other is a SUB or ADD instruction.
2896 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2897 // ADDr[ri](r1, r2, X).
2898 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2899 MachineInstr *SubAdd = nullptr;
2900 if (SrcReg2 != 0)
2901 // MI is not a candidate for CMPrr.
2902 MI = nullptr;
2903 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2904 // Conservatively refuse to convert an instruction which isn't in the same
2905 // BB as the comparison.
2906 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2907 // Thus we cannot return here.
2908 if (CmpInstr.getOpcode() == ARM::CMPri ||
2909 CmpInstr.getOpcode() == ARM::t2CMPri ||
2910 CmpInstr.getOpcode() == ARM::tCMPi8)
2911 MI = nullptr;
2912 else
2913 return false;
2914 }
2915
2916 bool IsThumb1 = false;
2917 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2918 return false;
2919
2920 // We also want to do this peephole for cases like this: if (a*b == 0),
2921 // and optimise away the CMP instruction from the generated code sequence:
2922 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2923 // resulting from the select instruction, but these MOVS instructions for
2924 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2925 // However, if we only have MOVS instructions in between the CMP and the
2926 // other instruction (the MULS in this example), then the CPSR is dead so we
2927 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2928 // reordering and then continue the analysis hoping we can eliminate the
2929 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2930 // consequence, the movs won't redefine/kill the MUL operands which would
2931 // make this reordering illegal.
2933 if (MI && IsThumb1) {
2934 --I;
2935 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2936 bool CanReorder = true;
2937 for (; I != E; --I) {
2938 if (I->getOpcode() != ARM::tMOVi8) {
2939 CanReorder = false;
2940 break;
2941 }
2942 }
2943 if (CanReorder) {
2944 MI = MI->removeFromParent();
2945 E = CmpInstr;
2946 CmpInstr.getParent()->insert(E, MI);
2947 }
2948 }
2949 I = CmpInstr;
2950 E = MI;
2951 }
2952
2953 // Check that CPSR isn't set between the comparison instruction and the one we
2954 // want to change. At the same time, search for SubAdd.
2955 bool SubAddIsThumb1 = false;
2956 do {
2957 const MachineInstr &Instr = *--I;
2958
2959 // Check whether CmpInstr can be made redundant by the current instruction.
2960 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2961 SubAddIsThumb1)) {
2962 SubAdd = &*I;
2963 break;
2964 }
2965
2966 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2967 if (I == E)
2968 break;
2969
2970 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2971 Instr.readsRegister(ARM::CPSR, TRI))
2972 // This instruction modifies or uses CPSR after the one we want to
2973 // change. We can't do this transformation.
2974 return false;
2975
2976 if (I == B) {
2977 // In some cases, we scan the use-list of an instruction for an AND;
2978 // that AND is in the same BB, but may not be scheduled before the
2979 // corresponding TST. In that case, bail out.
2980 //
2981 // FIXME: We could try to reschedule the AND.
2982 return false;
2983 }
2984 } while (true);
2985
2986 // Return false if no candidates exist.
2987 if (!MI && !SubAdd)
2988 return false;
2989
2990 // If we found a SubAdd, use it as it will be closer to the CMP
2991 if (SubAdd) {
2992 MI = SubAdd;
2993 IsThumb1 = SubAddIsThumb1;
2994 }
2995
2996 // We can't use a predicated instruction - it doesn't always write the flags.
2997 if (isPredicated(*MI))
2998 return false;
2999
3000 // Scan forward for the use of CPSR
3001 // When checking against MI: if it's a conditional code that requires
3002 // checking of the V bit or C bit, then this is not safe to do.
3003 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3004 // If we are done with the basic block, we need to check whether CPSR is
3005 // live-out.
3007 OperandsToUpdate;
3008 bool isSafe = false;
3009 I = CmpInstr;
3010 E = CmpInstr.getParent()->end();
3011 while (!isSafe && ++I != E) {
3012 const MachineInstr &Instr = *I;
3013 for (unsigned IO = 0, EO = Instr.getNumOperands();
3014 !isSafe && IO != EO; ++IO) {
3015 const MachineOperand &MO = Instr.getOperand(IO);
3016 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3017 isSafe = true;
3018 break;
3019 }
3020 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3021 continue;
3022 if (MO.isDef()) {
3023 isSafe = true;
3024 break;
3025 }
3026 // Condition code is after the operand before CPSR except for VSELs.
3028 bool IsInstrVSel = true;
3029 switch (Instr.getOpcode()) {
3030 default:
3031 IsInstrVSel = false;
3032 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3033 break;
3034 case ARM::VSELEQD:
3035 case ARM::VSELEQS:
3036 case ARM::VSELEQH:
3037 CC = ARMCC::EQ;
3038 break;
3039 case ARM::VSELGTD:
3040 case ARM::VSELGTS:
3041 case ARM::VSELGTH:
3042 CC = ARMCC::GT;
3043 break;
3044 case ARM::VSELGED:
3045 case ARM::VSELGES:
3046 case ARM::VSELGEH:
3047 CC = ARMCC::GE;
3048 break;
3049 case ARM::VSELVSD:
3050 case ARM::VSELVSS:
3051 case ARM::VSELVSH:
3052 CC = ARMCC::VS;
3053 break;
3054 }
3055
3056 if (SubAdd) {
3057 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3058 // on CMP needs to be updated to be based on SUB.
3059 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3060 // needs to be modified.
3061 // Push the condition code operands to OperandsToUpdate.
3062 // If it is safe to remove CmpInstr, the condition code of these
3063 // operands will be modified.
3064 unsigned Opc = SubAdd->getOpcode();
3065 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3066 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3067 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3068 Opc == ARM::tSUBi8;
3069 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3070 if (!IsSub ||
3071 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3072 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3073 // VSel doesn't support condition code update.
3074 if (IsInstrVSel)
3075 return false;
3076 // Ensure we can swap the condition.
3077 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3078 if (NewCC == ARMCC::AL)
3079 return false;
3080 OperandsToUpdate.push_back(
3081 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3082 }
3083 } else {
3084 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3085 switch (CC) {
3086 case ARMCC::EQ: // Z
3087 case ARMCC::NE: // Z
3088 case ARMCC::MI: // N
3089 case ARMCC::PL: // N
3090 case ARMCC::AL: // none
3091 // CPSR can be used multiple times, we should continue.
3092 break;
3093 case ARMCC::HS: // C
3094 case ARMCC::LO: // C
3095 case ARMCC::VS: // V
3096 case ARMCC::VC: // V
3097 case ARMCC::HI: // C Z
3098 case ARMCC::LS: // C Z
3099 case ARMCC::GE: // N V
3100 case ARMCC::LT: // N V
3101 case ARMCC::GT: // Z N V
3102 case ARMCC::LE: // Z N V
3103 // The instruction uses the V bit or C bit which is not safe.
3104 return false;
3105 }
3106 }
3107 }
3108 }
3109
3110 // If CPSR is not killed nor re-defined, we should check whether it is
3111 // live-out. If it is live-out, do not optimize.
3112 if (!isSafe) {
3113 MachineBasicBlock *MBB = CmpInstr.getParent();
3114 for (MachineBasicBlock *Succ : MBB->successors())
3115 if (Succ->isLiveIn(ARM::CPSR))
3116 return false;
3117 }
3118
3119 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3120 // set CPSR so this is represented as an explicit output)
3121 if (!IsThumb1) {
3122 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3123 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3124 MI->getOperand(CPSRRegNum).setIsDef(true);
3125 }
3126 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3127 CmpInstr.eraseFromParent();
3128
3129 // Modify the condition code of operands in OperandsToUpdate.
3130 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3131 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3132 for (auto &[MO, Cond] : OperandsToUpdate)
3133 MO->setImm(Cond);
3134
3135 MI->clearRegisterDeads(ARM::CPSR);
3136
3137 return true;
3138}
3139
3141 // Do not sink MI if it might be used to optimize a redundant compare.
3142 // We heuristically only look at the instruction immediately following MI to
3143 // avoid potentially searching the entire basic block.
3144 if (isPredicated(MI))
3145 return true;
3147 ++Next;
3148 Register SrcReg, SrcReg2;
3149 int64_t CmpMask, CmpValue;
3150 bool IsThumb1;
3151 if (Next != MI.getParent()->end() &&
3152 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3153 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3154 return false;
3155 return true;
3156}
3157
3159 Register Reg,
3160 MachineRegisterInfo *MRI) const {
3161 // Fold large immediates into add, sub, or, xor.
3162 unsigned DefOpc = DefMI.getOpcode();
3163 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3164 DefOpc != ARM::tMOVi32imm)
3165 return false;
3166 if (!DefMI.getOperand(1).isImm())
3167 // Could be t2MOVi32imm @xx
3168 return false;
3169
3170 if (!MRI->hasOneNonDBGUse(Reg))
3171 return false;
3172
3173 const MCInstrDesc &DefMCID = DefMI.getDesc();
3174 if (DefMCID.hasOptionalDef()) {
3175 unsigned NumOps = DefMCID.getNumOperands();
3176 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3177 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3178 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3179 // to delete DefMI.
3180 return false;
3181 }
3182
3183 const MCInstrDesc &UseMCID = UseMI.getDesc();
3184 if (UseMCID.hasOptionalDef()) {
3185 unsigned NumOps = UseMCID.getNumOperands();
3186 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3187 // If the instruction sets the flag, do not attempt this optimization
3188 // since it may change the semantics of the code.
3189 return false;
3190 }
3191
3192 unsigned UseOpc = UseMI.getOpcode();
3193 unsigned NewUseOpc = 0;
3194 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3195 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3196 bool Commute = false;
3197 switch (UseOpc) {
3198 default: return false;
3199 case ARM::SUBrr:
3200 case ARM::ADDrr:
3201 case ARM::ORRrr:
3202 case ARM::EORrr:
3203 case ARM::t2SUBrr:
3204 case ARM::t2ADDrr:
3205 case ARM::t2ORRrr:
3206 case ARM::t2EORrr: {
3207 Commute = UseMI.getOperand(2).getReg() != Reg;
3208 switch (UseOpc) {
3209 default: break;
3210 case ARM::ADDrr:
3211 case ARM::SUBrr:
3212 if (UseOpc == ARM::SUBrr && Commute)
3213 return false;
3214
3215 // ADD/SUB are special because they're essentially the same operation, so
3216 // we can handle a larger range of immediates.
3217 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3218 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3219 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3220 ImmVal = -ImmVal;
3221 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3222 } else
3223 return false;
3224 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3225 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3226 break;
3227 case ARM::ORRrr:
3228 case ARM::EORrr:
3229 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3230 return false;
3231 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3232 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3233 switch (UseOpc) {
3234 default: break;
3235 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3236 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3237 }
3238 break;
3239 case ARM::t2ADDrr:
3240 case ARM::t2SUBrr: {
3241 if (UseOpc == ARM::t2SUBrr && Commute)
3242 return false;
3243
3244 // ADD/SUB are special because they're essentially the same operation, so
3245 // we can handle a larger range of immediates.
3246 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3247 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3248 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3249 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3250 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3251 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3252 ImmVal = -ImmVal;
3253 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3254 } else
3255 return false;
3256 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3257 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3258 break;
3259 }
3260 case ARM::t2ORRrr:
3261 case ARM::t2EORrr:
3262 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3263 return false;
3264 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3265 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3266 switch (UseOpc) {
3267 default: break;
3268 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3269 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3270 }
3271 break;
3272 }
3273 }
3274 }
3275
3276 unsigned OpIdx = Commute ? 2 : 1;
3277 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3278 bool isKill = UseMI.getOperand(OpIdx).isKill();
3279 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3280 Register NewReg = MRI->createVirtualRegister(TRC);
3281 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3282 NewReg)
3283 .addReg(Reg1, getKillRegState(isKill))
3284 .addImm(SOImmValV1)
3286 .add(condCodeOp());
3287 UseMI.setDesc(get(NewUseOpc));
3288 UseMI.getOperand(1).setReg(NewReg);
3289 UseMI.getOperand(1).setIsKill();
3290 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3291 DefMI.eraseFromParent();
3292 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3293 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3294 // Then the below code will not be needed, as the input/output register
3295 // classes will be rgpr or gprSP.
3296 // For now, we fix the UseMI operand explicitly here:
3297 switch(NewUseOpc){
3298 case ARM::t2ADDspImm:
3299 case ARM::t2SUBspImm:
3300 case ARM::t2ADDri:
3301 case ARM::t2SUBri:
3302 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3303 }
3304 return true;
3305}
3306
3307static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3308 const MachineInstr &MI) {
3309 switch (MI.getOpcode()) {
3310 default: {
3311 const MCInstrDesc &Desc = MI.getDesc();
3312 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3313 assert(UOps >= 0 && "bad # UOps");
3314 return UOps;
3315 }
3316
3317 case ARM::LDRrs:
3318 case ARM::LDRBrs:
3319 case ARM::STRrs:
3320 case ARM::STRBrs: {
3321 unsigned ShOpVal = MI.getOperand(3).getImm();
3322 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3323 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3324 if (!isSub &&
3325 (ShImm == 0 ||
3326 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3327 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3328 return 1;
3329 return 2;
3330 }
3331
3332 case ARM::LDRH:
3333 case ARM::STRH: {
3334 if (!MI.getOperand(2).getReg())
3335 return 1;
3336
3337 unsigned ShOpVal = MI.getOperand(3).getImm();
3338 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3339 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3340 if (!isSub &&
3341 (ShImm == 0 ||
3342 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3343 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3344 return 1;
3345 return 2;
3346 }
3347
3348 case ARM::LDRSB:
3349 case ARM::LDRSH:
3350 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3351
3352 case ARM::LDRSB_POST:
3353 case ARM::LDRSH_POST: {
3354 Register Rt = MI.getOperand(0).getReg();
3355 Register Rm = MI.getOperand(3).getReg();
3356 return (Rt == Rm) ? 4 : 3;
3357 }
3358
3359 case ARM::LDR_PRE_REG:
3360 case ARM::LDRB_PRE_REG: {
3361 Register Rt = MI.getOperand(0).getReg();
3362 Register Rm = MI.getOperand(3).getReg();
3363 if (Rt == Rm)
3364 return 3;
3365 unsigned ShOpVal = MI.getOperand(4).getImm();
3366 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3367 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3368 if (!isSub &&
3369 (ShImm == 0 ||
3370 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3371 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3372 return 2;
3373 return 3;
3374 }
3375
3376 case ARM::STR_PRE_REG:
3377 case ARM::STRB_PRE_REG: {
3378 unsigned ShOpVal = MI.getOperand(4).getImm();
3379 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3380 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3381 if (!isSub &&
3382 (ShImm == 0 ||
3383 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3384 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3385 return 2;
3386 return 3;
3387 }
3388
3389 case ARM::LDRH_PRE:
3390 case ARM::STRH_PRE: {
3391 Register Rt = MI.getOperand(0).getReg();
3392 Register Rm = MI.getOperand(3).getReg();
3393 if (!Rm)
3394 return 2;
3395 if (Rt == Rm)
3396 return 3;
3397 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3398 }
3399
3400 case ARM::LDR_POST_REG:
3401 case ARM::LDRB_POST_REG:
3402 case ARM::LDRH_POST: {
3403 Register Rt = MI.getOperand(0).getReg();
3404 Register Rm = MI.getOperand(3).getReg();
3405 return (Rt == Rm) ? 3 : 2;
3406 }
3407
3408 case ARM::LDR_PRE_IMM:
3409 case ARM::LDRB_PRE_IMM:
3410 case ARM::LDR_POST_IMM:
3411 case ARM::LDRB_POST_IMM:
3412 case ARM::STRB_POST_IMM:
3413 case ARM::STRB_POST_REG:
3414 case ARM::STRB_PRE_IMM:
3415 case ARM::STRH_POST:
3416 case ARM::STR_POST_IMM:
3417 case ARM::STR_POST_REG:
3418 case ARM::STR_PRE_IMM:
3419 return 2;
3420
3421 case ARM::LDRSB_PRE:
3422 case ARM::LDRSH_PRE: {
3423 Register Rm = MI.getOperand(3).getReg();
3424 if (Rm == 0)
3425 return 3;
3426 Register Rt = MI.getOperand(0).getReg();
3427 if (Rt == Rm)
3428 return 4;
3429 unsigned ShOpVal = MI.getOperand(4).getImm();
3430 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3431 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3432 if (!isSub &&
3433 (ShImm == 0 ||
3434 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3435 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3436 return 3;
3437 return 4;
3438 }
3439
3440 case ARM::LDRD: {
3441 Register Rt = MI.getOperand(0).getReg();
3442 Register Rn = MI.getOperand(2).getReg();
3443 Register Rm = MI.getOperand(3).getReg();
3444 if (Rm)
3445 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3446 : 3;
3447 return (Rt == Rn) ? 3 : 2;
3448 }
3449
3450 case ARM::STRD: {
3451 Register Rm = MI.getOperand(3).getReg();
3452 if (Rm)
3453 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3454 : 3;
3455 return 2;
3456 }
3457
3458 case ARM::LDRD_POST:
3459 case ARM::t2LDRD_POST:
3460 return 3;
3461
3462 case ARM::STRD_POST:
3463 case ARM::t2STRD_POST:
3464 return 4;
3465
3466 case ARM::LDRD_PRE: {
3467 Register Rt = MI.getOperand(0).getReg();
3468 Register Rn = MI.getOperand(3).getReg();
3469 Register Rm = MI.getOperand(4).getReg();
3470 if (Rm)
3471 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3472 : 4;
3473 return (Rt == Rn) ? 4 : 3;
3474 }
3475
3476 case ARM::t2LDRD_PRE: {
3477 Register Rt = MI.getOperand(0).getReg();
3478 Register Rn = MI.getOperand(3).getReg();
3479 return (Rt == Rn) ? 4 : 3;
3480 }
3481
3482 case ARM::STRD_PRE: {
3483 Register Rm = MI.getOperand(4).getReg();
3484 if (Rm)
3485 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3486 : 4;
3487 return 3;
3488 }
3489
3490 case ARM::t2STRD_PRE:
3491 return 3;
3492
3493 case ARM::t2LDR_POST:
3494 case ARM::t2LDRB_POST:
3495 case ARM::t2LDRB_PRE:
3496 case ARM::t2LDRSBi12:
3497 case ARM::t2LDRSBi8:
3498 case ARM::t2LDRSBpci:
3499 case ARM::t2LDRSBs:
3500 case ARM::t2LDRH_POST:
3501 case ARM::t2LDRH_PRE:
3502 case ARM::t2LDRSBT:
3503 case ARM::t2LDRSB_POST:
3504 case ARM::t2LDRSB_PRE:
3505 case ARM::t2LDRSH_POST:
3506 case ARM::t2LDRSH_PRE:
3507 case ARM::t2LDRSHi12:
3508 case ARM::t2LDRSHi8:
3509 case ARM::t2LDRSHpci:
3510 case ARM::t2LDRSHs:
3511 return 2;
3512
3513 case ARM::t2LDRDi8: {
3514 Register Rt = MI.getOperand(0).getReg();
3515 Register Rn = MI.getOperand(2).getReg();
3516 return (Rt == Rn) ? 3 : 2;
3517 }
3518
3519 case ARM::t2STRB_POST:
3520 case ARM::t2STRB_PRE:
3521 case ARM::t2STRBs:
3522 case ARM::t2STRDi8:
3523 case ARM::t2STRH_POST:
3524 case ARM::t2STRH_PRE:
3525 case ARM::t2STRHs:
3526 case ARM::t2STR_POST:
3527 case ARM::t2STR_PRE:
3528 case ARM::t2STRs:
3529 return 2;
3530 }
3531}
3532
3533// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3534// can't be easily determined return 0 (missing MachineMemOperand).
3535//
3536// FIXME: The current MachineInstr design does not support relying on machine
3537// mem operands to determine the width of a memory access. Instead, we expect
3538// the target to provide this information based on the instruction opcode and
3539// operands. However, using MachineMemOperand is the best solution now for
3540// two reasons:
3541//
3542// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3543// operands. This is much more dangerous than using the MachineMemOperand
3544// sizes because CodeGen passes can insert/remove optional machine operands. In
3545// fact, it's totally incorrect for preRA passes and appears to be wrong for
3546// postRA passes as well.
3547//
3548// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3549// machine model that calls this should handle the unknown (zero size) case.
3550//
3551// Long term, we should require a target hook that verifies MachineMemOperand
3552// sizes during MC lowering. That target hook should be local to MC lowering
3553// because we can't ensure that it is aware of other MI forms. Doing this will
3554// ensure that MachineMemOperands are correctly propagated through all passes.
3556 unsigned Size = 0;
3557 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3558 E = MI.memoperands_end();
3559 I != E; ++I) {
3560 Size += (*I)->getSize().getValue();
3561 }
3562 // FIXME: The scheduler currently can't handle values larger than 16. But
3563 // the values can actually go up to 32 for floating-point load/store
3564 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3565 // operations isn't right; we could end up with "extra" memory operands for
3566 // various reasons, like tail merge merging two memory operations.
3567 return std::min(Size / 4, 16U);
3568}
3569
3571 unsigned NumRegs) {
3572 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3573 switch (Opc) {
3574 default:
3575 break;
3576 case ARM::VLDMDIA_UPD:
3577 case ARM::VLDMDDB_UPD:
3578 case ARM::VLDMSIA_UPD:
3579 case ARM::VLDMSDB_UPD:
3580 case ARM::VSTMDIA_UPD:
3581 case ARM::VSTMDDB_UPD:
3582 case ARM::VSTMSIA_UPD:
3583 case ARM::VSTMSDB_UPD:
3584 case ARM::LDMIA_UPD:
3585 case ARM::LDMDA_UPD:
3586 case ARM::LDMDB_UPD:
3587 case ARM::LDMIB_UPD:
3588 case ARM::STMIA_UPD:
3589 case ARM::STMDA_UPD:
3590 case ARM::STMDB_UPD:
3591 case ARM::STMIB_UPD:
3592 case ARM::tLDMIA_UPD:
3593 case ARM::tSTMIA_UPD:
3594 case ARM::t2LDMIA_UPD:
3595 case ARM::t2LDMDB_UPD:
3596 case ARM::t2STMIA_UPD:
3597 case ARM::t2STMDB_UPD:
3598 ++UOps; // One for base register writeback.
3599 break;
3600 case ARM::LDMIA_RET:
3601 case ARM::tPOP_RET:
3602 case ARM::t2LDMIA_RET:
3603 UOps += 2; // One for base reg wb, one for write to pc.
3604 break;
3605 }
3606 return UOps;
3607}
3608
3610 const MachineInstr &MI) const {
3611 if (!ItinData || ItinData->isEmpty())
3612 return 1;
3613
3614 const MCInstrDesc &Desc = MI.getDesc();
3615 unsigned Class = Desc.getSchedClass();
3616 int ItinUOps = ItinData->getNumMicroOps(Class);
3617 if (ItinUOps >= 0) {
3618 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3619 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3620
3621 return ItinUOps;
3622 }
3623
3624 unsigned Opc = MI.getOpcode();
3625 switch (Opc) {
3626 default:
3627 llvm_unreachable("Unexpected multi-uops instruction!");
3628 case ARM::VLDMQIA:
3629 case ARM::VSTMQIA:
3630 return 2;
3631
3632 // The number of uOps for load / store multiple are determined by the number
3633 // registers.
3634 //
3635 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3636 // same cycle. The scheduling for the first load / store must be done
3637 // separately by assuming the address is not 64-bit aligned.
3638 //
3639 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3640 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3641 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3642 case ARM::VLDMDIA:
3643 case ARM::VLDMDIA_UPD:
3644 case ARM::VLDMDDB_UPD:
3645 case ARM::VLDMSIA:
3646 case ARM::VLDMSIA_UPD:
3647 case ARM::VLDMSDB_UPD:
3648 case ARM::VSTMDIA:
3649 case ARM::VSTMDIA_UPD:
3650 case ARM::VSTMDDB_UPD:
3651 case ARM::VSTMSIA:
3652 case ARM::VSTMSIA_UPD:
3653 case ARM::VSTMSDB_UPD: {
3654 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3655 return (NumRegs / 2) + (NumRegs % 2) + 1;
3656 }
3657
3658 case ARM::LDMIA_RET:
3659 case ARM::LDMIA:
3660 case ARM::LDMDA:
3661 case ARM::LDMDB:
3662 case ARM::LDMIB:
3663 case ARM::LDMIA_UPD:
3664 case ARM::LDMDA_UPD:
3665 case ARM::LDMDB_UPD:
3666 case ARM::LDMIB_UPD:
3667 case ARM::STMIA:
3668 case ARM::STMDA:
3669 case ARM::STMDB:
3670 case ARM::STMIB:
3671 case ARM::STMIA_UPD:
3672 case ARM::STMDA_UPD:
3673 case ARM::STMDB_UPD:
3674 case ARM::STMIB_UPD:
3675 case ARM::tLDMIA:
3676 case ARM::tLDMIA_UPD:
3677 case ARM::tSTMIA_UPD:
3678 case ARM::tPOP_RET:
3679 case ARM::tPOP:
3680 case ARM::tPUSH:
3681 case ARM::t2LDMIA_RET:
3682 case ARM::t2LDMIA:
3683 case ARM::t2LDMDB:
3684 case ARM::t2LDMIA_UPD:
3685 case ARM::t2LDMDB_UPD:
3686 case ARM::t2STMIA:
3687 case ARM::t2STMDB:
3688 case ARM::t2STMIA_UPD:
3689 case ARM::t2STMDB_UPD: {
3690 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3691 switch (Subtarget.getLdStMultipleTiming()) {
3695 // Assume the worst.
3696 return NumRegs;
3698 if (NumRegs < 4)
3699 return 2;
3700 // 4 registers would be issued: 2, 2.
3701 // 5 registers would be issued: 2, 2, 1.
3702 unsigned UOps = (NumRegs / 2);
3703 if (NumRegs % 2)
3704 ++UOps;
3705 return UOps;
3706 }
3708 unsigned UOps = (NumRegs / 2);
3709 // If there are odd number of registers or if it's not 64-bit aligned,
3710 // then it takes an extra AGU (Address Generation Unit) cycle.
3711 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3712 (*MI.memoperands_begin())->getAlign() < Align(8))
3713 ++UOps;
3714 return UOps;
3715 }
3716 }
3717 }
3718 }
3719 llvm_unreachable("Didn't find the number of microops");
3720}
3721
3722std::optional<unsigned>
3723ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3724 const MCInstrDesc &DefMCID, unsigned DefClass,
3725 unsigned DefIdx, unsigned DefAlign) const {
3726 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3727 if (RegNo <= 0)
3728 // Def is the address writeback.
3729 return ItinData->getOperandCycle(DefClass, DefIdx);
3730
3731 unsigned DefCycle;
3732 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3733 // (regno / 2) + (regno % 2) + 1
3734 DefCycle = RegNo / 2 + 1;
3735 if (RegNo % 2)
3736 ++DefCycle;
3737 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3738 DefCycle = RegNo;
3739 bool isSLoad = false;
3740
3741 switch (DefMCID.getOpcode()) {
3742 default: break;
3743 case ARM::VLDMSIA:
3744 case ARM::VLDMSIA_UPD:
3745 case ARM::VLDMSDB_UPD:
3746 isSLoad = true;
3747 break;
3748 }
3749
3750 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3751 // then it takes an extra cycle.
3752 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3753 ++DefCycle;
3754 } else {
3755 // Assume the worst.
3756 DefCycle = RegNo + 2;
3757 }
3758
3759 return DefCycle;
3760}
3761
3762std::optional<unsigned>
3763ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3764 const MCInstrDesc &DefMCID, unsigned DefClass,
3765 unsigned DefIdx, unsigned DefAlign) const {
3766 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3767 if (RegNo <= 0)
3768 // Def is the address writeback.
3769 return ItinData->getOperandCycle(DefClass, DefIdx);
3770
3771 unsigned DefCycle;
3772 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3773 // 4 registers would be issued: 1, 2, 1.
3774 // 5 registers would be issued: 1, 2, 2.
3775 DefCycle = RegNo / 2;
3776 if (DefCycle < 1)
3777 DefCycle = 1;
3778 // Result latency is issue cycle + 2: E2.
3779 DefCycle += 2;
3780 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3781 DefCycle = (RegNo / 2);
3782 // If there are odd number of registers or if it's not 64-bit aligned,
3783 // then it takes an extra AGU (Address Generation Unit) cycle.
3784 if ((RegNo % 2) || DefAlign < 8)
3785 ++DefCycle;
3786 // Result latency is AGU cycles + 2.
3787 DefCycle += 2;
3788 } else {
3789 // Assume the worst.
3790 DefCycle = RegNo + 2;
3791 }
3792
3793 return DefCycle;
3794}
3795
3796std::optional<unsigned>
3797ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3798 const MCInstrDesc &UseMCID, unsigned UseClass,
3799 unsigned UseIdx, unsigned UseAlign) const {
3800 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3801 if (RegNo <= 0)
3802 return ItinData->getOperandCycle(UseClass, UseIdx);
3803
3804 unsigned UseCycle;
3805 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3806 // (regno / 2) + (regno % 2) + 1
3807 UseCycle = RegNo / 2 + 1;
3808 if (RegNo % 2)
3809 ++UseCycle;
3810 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3811 UseCycle = RegNo;
3812 bool isSStore = false;
3813
3814 switch (UseMCID.getOpcode()) {
3815 default: break;
3816 case ARM::VSTMSIA:
3817 case ARM::VSTMSIA_UPD:
3818 case ARM::VSTMSDB_UPD:
3819 isSStore = true;
3820 break;
3821 }
3822
3823 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3824 // then it takes an extra cycle.
3825 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3826 ++UseCycle;
3827 } else {
3828 // Assume the worst.
3829 UseCycle = RegNo + 2;
3830 }
3831
3832 return UseCycle;
3833}
3834
3835std::optional<unsigned>
3836ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3837 const MCInstrDesc &UseMCID, unsigned UseClass,
3838 unsigned UseIdx, unsigned UseAlign) const {
3839 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3840 if (RegNo <= 0)
3841 return ItinData->getOperandCycle(UseClass, UseIdx);
3842
3843 unsigned UseCycle;
3844 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3845 UseCycle = RegNo / 2;
3846 if (UseCycle < 2)
3847 UseCycle = 2;
3848 // Read in E3.
3849 UseCycle += 2;
3850 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3851 UseCycle = (RegNo / 2);
3852 // If there are odd number of registers or if it's not 64-bit aligned,
3853 // then it takes an extra AGU (Address Generation Unit) cycle.
3854 if ((RegNo % 2) || UseAlign < 8)
3855 ++UseCycle;
3856 } else {
3857 // Assume the worst.
3858 UseCycle = 1;
3859 }
3860 return UseCycle;
3861}
3862
3863std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3864 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3865 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3866 unsigned UseIdx, unsigned UseAlign) const {
3867 unsigned DefClass = DefMCID.getSchedClass();
3868 unsigned UseClass = UseMCID.getSchedClass();
3869
3870 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3871 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3872
3873 // This may be a def / use of a variable_ops instruction, the operand
3874 // latency might be determinable dynamically. Let the target try to
3875 // figure it out.
3876 std::optional<unsigned> DefCycle;
3877 bool LdmBypass = false;
3878 switch (DefMCID.getOpcode()) {
3879 default:
3880 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3881 break;
3882
3883 case ARM::VLDMDIA:
3884 case ARM::VLDMDIA_UPD:
3885 case ARM::VLDMDDB_UPD:
3886 case ARM::VLDMSIA:
3887 case ARM::VLDMSIA_UPD:
3888 case ARM::VLDMSDB_UPD:
3889 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3890 break;
3891
3892 case ARM::LDMIA_RET:
3893 case ARM::LDMIA:
3894 case ARM::LDMDA:
3895 case ARM::LDMDB:
3896 case ARM::LDMIB:
3897 case ARM::LDMIA_UPD:
3898 case ARM::LDMDA_UPD:
3899 case ARM::LDMDB_UPD:
3900 case ARM::LDMIB_UPD:
3901 case ARM::tLDMIA:
3902 case ARM::tLDMIA_UPD:
3903 case ARM::tPUSH:
3904 case ARM::t2LDMIA_RET:
3905 case ARM::t2LDMIA:
3906 case ARM::t2LDMDB:
3907 case ARM::t2LDMIA_UPD:
3908 case ARM::t2LDMDB_UPD:
3909 LdmBypass = true;
3910 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3911 break;
3912 }
3913
3914 if (!DefCycle)
3915 // We can't seem to determine the result latency of the def, assume it's 2.
3916 DefCycle = 2;
3917
3918 std::optional<unsigned> UseCycle;
3919 switch (UseMCID.getOpcode()) {
3920 default:
3921 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3922 break;
3923
3924 case ARM::VSTMDIA:
3925 case ARM::VSTMDIA_UPD:
3926 case ARM::VSTMDDB_UPD:
3927 case ARM::VSTMSIA:
3928 case ARM::VSTMSIA_UPD:
3929 case ARM::VSTMSDB_UPD:
3930 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3931 break;
3932
3933 case ARM::STMIA:
3934 case ARM::STMDA:
3935 case ARM::STMDB:
3936 case ARM::STMIB:
3937 case ARM::STMIA_UPD:
3938 case ARM::STMDA_UPD:
3939 case ARM::STMDB_UPD:
3940 case ARM::STMIB_UPD:
3941 case ARM::tSTMIA_UPD:
3942 case ARM::tPOP_RET:
3943 case ARM::tPOP:
3944 case ARM::t2STMIA:
3945 case ARM::t2STMDB:
3946 case ARM::t2STMIA_UPD:
3947 case ARM::t2STMDB_UPD:
3948 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3949 break;
3950 }
3951
3952 if (!UseCycle)
3953 // Assume it's read in the first stage.
3954 UseCycle = 1;
3955
3956 if (UseCycle > *DefCycle + 1)
3957 return std::nullopt;
3958
3959 UseCycle = *DefCycle - *UseCycle + 1;
3960 if (UseCycle > 0u) {
3961 if (LdmBypass) {
3962 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3963 // first def operand.
3964 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3965 UseClass, UseIdx))
3966 UseCycle = *UseCycle - 1;
3967 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3968 UseClass, UseIdx)) {
3969 UseCycle = *UseCycle - 1;
3970 }
3971 }
3972
3973 return UseCycle;
3974}
3975
3977 const MachineInstr *MI, unsigned Reg,
3978 unsigned &DefIdx, unsigned &Dist) {
3979 Dist = 0;
3980
3982 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3983 assert(II->isInsideBundle() && "Empty bundle?");
3984
3985 int Idx = -1;
3986 while (II->isInsideBundle()) {
3987 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
3988 if (Idx != -1)
3989 break;
3990 --II;
3991 ++Dist;
3992 }
3993
3994 assert(Idx != -1 && "Cannot find bundled definition!");
3995 DefIdx = Idx;
3996 return &*II;
3997}
3998
4000 const MachineInstr &MI, unsigned Reg,
4001 unsigned &UseIdx, unsigned &Dist) {
4002 Dist = 0;
4003
4005 assert(II->isInsideBundle() && "Empty bundle?");
4006 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4007
4008 // FIXME: This doesn't properly handle multiple uses.
4009 int Idx = -1;
4010 while (II != E && II->isInsideBundle()) {
4011 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4012 if (Idx != -1)
4013 break;
4014 if (II->getOpcode() != ARM::t2IT)
4015 ++Dist;
4016 ++II;
4017 }
4018
4019 if (Idx == -1) {
4020 Dist = 0;
4021 return nullptr;
4022 }
4023
4024 UseIdx = Idx;
4025 return &*II;
4026}
4027
4028/// Return the number of cycles to add to (or subtract from) the static
4029/// itinerary based on the def opcode and alignment. The caller will ensure that
4030/// adjusted latency is at least one cycle.
4031static int adjustDefLatency(const ARMSubtarget &Subtarget,
4032 const MachineInstr &DefMI,
4033 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4034 int Adjust = 0;
4035 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4036 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4037 // variants are one cycle cheaper.
4038 switch (DefMCID.getOpcode()) {
4039 default: break;
4040 case ARM::LDRrs:
4041 case ARM::LDRBrs: {
4042 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4043 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4044 if (ShImm == 0 ||
4045 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4046 --Adjust;
4047 break;
4048 }
4049 case ARM::t2LDRs:
4050 case ARM::t2LDRBs:
4051 case ARM::t2LDRHs:
4052 case ARM::t2LDRSHs: {
4053 // Thumb2 mode: lsl only.
4054 unsigned ShAmt = DefMI.getOperand(3).getImm();
4055 if (ShAmt == 0 || ShAmt == 2)
4056 --Adjust;
4057 break;
4058 }
4059 }
4060 } else if (Subtarget.isSwift()) {
4061 // FIXME: Properly handle all of the latency adjustments for address
4062 // writeback.
4063 switch (DefMCID.getOpcode()) {
4064 default: break;
4065 case ARM::LDRrs:
4066 case ARM::LDRBrs: {
4067 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4068 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4069 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4070 if (!isSub &&
4071 (ShImm == 0 ||
4072 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4073 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4074 Adjust -= 2;
4075 else if (!isSub &&
4076 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4077 --Adjust;
4078 break;
4079 }
4080 case ARM::t2LDRs:
4081 case ARM::t2LDRBs:
4082 case ARM::t2LDRHs:
4083 case ARM::t2LDRSHs: {
4084 // Thumb2 mode: lsl only.
4085 unsigned ShAmt = DefMI.getOperand(3).getImm();
4086 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4087 Adjust -= 2;
4088 break;
4089 }
4090 }
4091 }
4092
4093 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4094 switch (DefMCID.getOpcode()) {
4095 default: break;
4096 case ARM::VLD1q8:
4097 case ARM::VLD1q16:
4098 case ARM::VLD1q32:
4099 case ARM::VLD1q64:
4100 case ARM::VLD1q8wb_fixed:
4101 case ARM::VLD1q16wb_fixed:
4102 case ARM::VLD1q32wb_fixed:
4103 case ARM::VLD1q64wb_fixed:
4104 case ARM::VLD1q8wb_register:
4105 case ARM::VLD1q16wb_register:
4106 case ARM::VLD1q32wb_register:
4107 case ARM::VLD1q64wb_register:
4108 case ARM::VLD2d8:
4109 case ARM::VLD2d16:
4110 case ARM::VLD2d32:
4111 case ARM::VLD2q8:
4112 case ARM::VLD2q16:
4113 case ARM::VLD2q32:
4114 case ARM::VLD2d8wb_fixed:
4115 case ARM::VLD2d16wb_fixed:
4116 case ARM::VLD2d32wb_fixed:
4117 case ARM::VLD2q8wb_fixed:
4118 case ARM::VLD2q16wb_fixed:
4119 case ARM::VLD2q32wb_fixed:
4120 case ARM::VLD2d8wb_register:
4121 case ARM::VLD2d16wb_register:
4122 case ARM::VLD2d32wb_register:
4123 case ARM::VLD2q8wb_register:
4124 case ARM::VLD2q16wb_register:
4125 case ARM::VLD2q32wb_register:
4126 case ARM::VLD3d8:
4127 case ARM::VLD3d16:
4128 case ARM::VLD3d32:
4129 case ARM::VLD1d64T:
4130 case ARM::VLD3d8_UPD:
4131 case ARM::VLD3d16_UPD:
4132 case ARM::VLD3d32_UPD:
4133 case ARM::VLD1d64Twb_fixed:
4134 case ARM::VLD1d64Twb_register:
4135 case ARM::VLD3q8_UPD:
4136 case ARM::VLD3q16_UPD:
4137 case ARM::VLD3q32_UPD:
4138 case ARM::VLD4d8:
4139 case ARM::VLD4d16:
4140 case ARM::VLD4d32:
4141 case ARM::VLD1d64Q:
4142 case ARM::VLD4d8_UPD:
4143 case ARM::VLD4d16_UPD:
4144 case ARM::VLD4d32_UPD:
4145 case ARM::VLD1d64Qwb_fixed:
4146 case ARM::VLD1d64Qwb_register:
4147 case ARM::VLD4q8_UPD:
4148 case ARM::VLD4q16_UPD:
4149 case ARM::VLD4q32_UPD:
4150 case ARM::VLD1DUPq8:
4151 case ARM::VLD1DUPq16:
4152 case ARM::VLD1DUPq32:
4153 case ARM::VLD1DUPq8wb_fixed:
4154 case ARM::VLD1DUPq16wb_fixed:
4155 case ARM::VLD1DUPq32wb_fixed:
4156 case ARM::VLD1DUPq8wb_register:
4157 case ARM::VLD1DUPq16wb_register:
4158 case ARM::VLD1DUPq32wb_register:
4159 case ARM::VLD2DUPd8:
4160 case ARM::VLD2DUPd16:
4161 case ARM::VLD2DUPd32:
4162 case ARM::VLD2DUPd8wb_fixed:
4163 case ARM::VLD2DUPd16wb_fixed:
4164 case ARM::VLD2DUPd32wb_fixed:
4165 case ARM::VLD2DUPd8wb_register:
4166 case ARM::VLD2DUPd16wb_register:
4167 case ARM::VLD2DUPd32wb_register:
4168 case ARM::VLD4DUPd8:
4169 case ARM::VLD4DUPd16:
4170 case ARM::VLD4DUPd32:
4171 case ARM::VLD4DUPd8_UPD:
4172 case ARM::VLD4DUPd16_UPD:
4173 case ARM::VLD4DUPd32_UPD:
4174 case ARM::VLD1LNd8:
4175 case ARM::VLD1LNd16:
4176 case ARM::VLD1LNd32:
4177 case ARM::VLD1LNd8_UPD:
4178 case ARM::VLD1LNd16_UPD:
4179 case ARM::VLD1LNd32_UPD:
4180 case ARM::VLD2LNd8:
4181 case ARM::VLD2LNd16:
4182 case ARM::VLD2LNd32:
4183 case ARM::VLD2LNq16:
4184 case ARM::VLD2LNq32:
4185 case ARM::VLD2LNd8_UPD:
4186 case ARM::VLD2LNd16_UPD:
4187 case ARM::VLD2LNd32_UPD:
4188 case ARM::VLD2LNq16_UPD:
4189 case ARM::VLD2LNq32_UPD:
4190 case ARM::VLD4LNd8:
4191 case ARM::VLD4LNd16:
4192 case ARM::VLD4LNd32:
4193 case ARM::VLD4LNq16:
4194 case ARM::VLD4LNq32:
4195 case ARM::VLD4LNd8_UPD:
4196 case ARM::VLD4LNd16_UPD:
4197 case ARM::VLD4LNd32_UPD:
4198 case ARM::VLD4LNq16_UPD:
4199 case ARM::VLD4LNq32_UPD:
4200 // If the address is not 64-bit aligned, the latencies of these
4201 // instructions increases by one.
4202 ++Adjust;
4203 break;
4204 }
4205 }
4206 return Adjust;
4207}
4208
4210 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4211 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4212 // No operand latency. The caller may fall back to getInstrLatency.
4213 if (!ItinData || ItinData->isEmpty())
4214 return std::nullopt;
4215
4216 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4217 Register Reg = DefMO.getReg();
4218
4219 const MachineInstr *ResolvedDefMI = &DefMI;
4220 unsigned DefAdj = 0;
4221 if (DefMI.isBundle())
4222 ResolvedDefMI =
4223 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4224 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4225 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4226 return 1;
4227 }
4228
4229 const MachineInstr *ResolvedUseMI = &UseMI;
4230 unsigned UseAdj = 0;
4231 if (UseMI.isBundle()) {
4232 ResolvedUseMI =
4233 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4234 if (!ResolvedUseMI)
4235 return std::nullopt;
4236 }
4237
4238 return getOperandLatencyImpl(
4239 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4240 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4241}
4242
4243std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4244 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4245 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4246 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4247 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4248 if (Reg == ARM::CPSR) {
4249 if (DefMI.getOpcode() == ARM::FMSTAT) {
4250 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4251 return Subtarget.isLikeA9() ? 1 : 20;
4252 }
4253
4254 // CPSR set and branch can be paired in the same cycle.
4255 if (UseMI.isBranch())
4256 return 0;
4257
4258 // Otherwise it takes the instruction latency (generally one).
4259 unsigned Latency = getInstrLatency(ItinData, DefMI);
4260
4261 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4262 // its uses. Instructions which are otherwise scheduled between them may
4263 // incur a code size penalty (not able to use the CPSR setting 16-bit
4264 // instructions).
4265 if (Latency > 0 && Subtarget.isThumb2()) {
4266 const MachineFunction *MF = DefMI.getParent()->getParent();
4267 if (MF->getFunction().hasOptSize())
4268 --Latency;
4269 }
4270 return Latency;
4271 }
4272
4273 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4274 return std::nullopt;
4275
4276 unsigned DefAlign = DefMI.hasOneMemOperand()
4277 ? (*DefMI.memoperands_begin())->getAlign().value()
4278 : 0;
4279 unsigned UseAlign = UseMI.hasOneMemOperand()
4280 ? (*UseMI.memoperands_begin())->getAlign().value()
4281 : 0;
4282
4283 // Get the itinerary's latency if possible, and handle variable_ops.
4284 std::optional<unsigned> Latency = getOperandLatency(
4285 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4286 // Unable to find operand latency. The caller may resort to getInstrLatency.
4287 if (!Latency)
4288 return std::nullopt;
4289
4290 // Adjust for IT block position.
4291 int Adj = DefAdj + UseAdj;
4292
4293 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4294 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4295 if (Adj >= 0 || (int)*Latency > -Adj) {
4296 return *Latency + Adj;
4297 }
4298 // Return the itinerary latency, which may be zero but not less than zero.
4299 return Latency;
4300}
4301
4302std::optional<unsigned>
4304 SDNode *DefNode, unsigned DefIdx,
4305 SDNode *UseNode, unsigned UseIdx) const {
4306 if (!DefNode->isMachineOpcode())
4307 return 1;
4308
4309 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4310
4311 if (isZeroCost(DefMCID.Opcode))
4312 return 0;
4313
4314 if (!ItinData || ItinData->isEmpty())
4315 return DefMCID.mayLoad() ? 3 : 1;
4316
4317 if (!UseNode->isMachineOpcode()) {
4318 std::optional<unsigned> Latency =
4319 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4320 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4321 int Threshold = 1 + Adj;
4322 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4323 }
4324
4325 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4326 auto *DefMN = cast<MachineSDNode>(DefNode);
4327 unsigned DefAlign = !DefMN->memoperands_empty()
4328 ? (*DefMN->memoperands_begin())->getAlign().value()
4329 : 0;
4330 auto *UseMN = cast<MachineSDNode>(UseNode);
4331 unsigned UseAlign = !UseMN->memoperands_empty()
4332 ? (*UseMN->memoperands_begin())->getAlign().value()
4333 : 0;
4334 std::optional<unsigned> Latency = getOperandLatency(
4335 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4336 if (!Latency)
4337 return std::nullopt;
4338
4339 if (Latency > 1U &&
4340 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4341 Subtarget.isCortexA7())) {
4342 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4343 // variants are one cycle cheaper.
4344 switch (DefMCID.getOpcode()) {
4345 default: break;
4346 case ARM::LDRrs:
4347 case ARM::LDRBrs: {
4348 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4349 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4350 if (ShImm == 0 ||
4351 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4352 Latency = *Latency - 1;
4353 break;
4354 }
4355 case ARM::t2LDRs:
4356 case ARM::t2LDRBs:
4357 case ARM::t2LDRHs:
4358 case ARM::t2LDRSHs: {
4359 // Thumb2 mode: lsl only.
4360 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4361 if (ShAmt == 0 || ShAmt == 2)
4362 Latency = *Latency - 1;
4363 break;
4364 }
4365 }
4366 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4367 // FIXME: Properly handle all of the latency adjustments for address
4368 // writeback.
4369 switch (DefMCID.getOpcode()) {
4370 default: break;
4371 case ARM::LDRrs:
4372 case ARM::LDRBrs: {
4373 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4374 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4375 if (ShImm == 0 ||
4376 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4378 Latency = *Latency - 2;
4379 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4380 Latency = *Latency - 1;
4381 break;
4382 }
4383 case ARM::t2LDRs:
4384 case ARM::t2LDRBs:
4385 case ARM::t2LDRHs:
4386 case ARM::t2LDRSHs:
4387 // Thumb2 mode: lsl 0-3 only.
4388 Latency = *Latency - 2;
4389 break;
4390 }
4391 }
4392
4393 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4394 switch (DefMCID.getOpcode()) {
4395 default: break;
4396 case ARM::VLD1q8:
4397 case ARM::VLD1q16:
4398 case ARM::VLD1q32:
4399 case ARM::VLD1q64:
4400 case ARM::VLD1q8wb_register:
4401 case ARM::VLD1q16wb_register:
4402 case ARM::VLD1q32wb_register:
4403 case ARM::VLD1q64wb_register:
4404 case ARM::VLD1q8wb_fixed:
4405 case ARM::VLD1q16wb_fixed:
4406 case ARM::VLD1q32wb_fixed:
4407 case ARM::VLD1q64wb_fixed:
4408 case ARM::VLD2d8:
4409 case ARM::VLD2d16:
4410 case ARM::VLD2d32:
4411 case ARM::VLD2q8Pseudo:
4412 case ARM::VLD2q16Pseudo:
4413 case ARM::VLD2q32Pseudo:
4414 case ARM::VLD2d8wb_fixed:
4415 case ARM::VLD2d16wb_fixed:
4416 case ARM::VLD2d32wb_fixed:
4417 case ARM::VLD2q8PseudoWB_fixed:
4418 case ARM::VLD2q16PseudoWB_fixed:
4419 case ARM::VLD2q32PseudoWB_fixed:
4420 case ARM::VLD2d8wb_register:
4421 case ARM::VLD2d16wb_register:
4422 case ARM::VLD2d32wb_register:
4423 case ARM::VLD2q8PseudoWB_register:
4424 case ARM::VLD2q16PseudoWB_register:
4425 case ARM::VLD2q32PseudoWB_register:
4426 case ARM::VLD3d8Pseudo:
4427 case ARM::VLD3d16Pseudo:
4428 case ARM::VLD3d32Pseudo:
4429 case ARM::VLD1d8TPseudo:
4430 case ARM::VLD1d16TPseudo:
4431 case ARM::VLD1d32TPseudo:
4432 case ARM::VLD1d64TPseudo:
4433 case ARM::VLD1d64TPseudoWB_fixed:
4434 case ARM::VLD1d64TPseudoWB_register:
4435 case ARM::VLD3d8Pseudo_UPD:
4436 case ARM::VLD3d16Pseudo_UPD:
4437 case ARM::VLD3d32Pseudo_UPD:
4438 case ARM::VLD3q8Pseudo_UPD:
4439 case ARM::VLD3q16Pseudo_UPD:
4440 case ARM::VLD3q32Pseudo_UPD:
4441 case ARM::VLD3q8oddPseudo:
4442 case ARM::VLD3q16oddPseudo:
4443 case ARM::VLD3q32oddPseudo:
4444 case ARM::VLD3q8oddPseudo_UPD:
4445 case ARM::VLD3q16oddPseudo_UPD:
4446 case ARM::VLD3q32oddPseudo_UPD:
4447 case ARM::VLD4d8Pseudo:
4448 case ARM::VLD4d16Pseudo:
4449 case ARM::VLD4d32Pseudo:
4450 case ARM::VLD1d8QPseudo:
4451 case ARM::VLD1d16QPseudo:
4452 case ARM::VLD1d32QPseudo:
4453 case ARM::VLD1d64QPseudo:
4454 case ARM::VLD1d64QPseudoWB_fixed:
4455 case ARM::VLD1d64QPseudoWB_register:
4456 case ARM::VLD1q8HighQPseudo:
4457 case ARM::VLD1q8LowQPseudo_UPD:
4458 case ARM::VLD1q8HighTPseudo:
4459 case ARM::VLD1q8LowTPseudo_UPD:
4460 case ARM::VLD1q16HighQPseudo:
4461 case ARM::VLD1q16LowQPseudo_UPD:
4462 case ARM::VLD1q16HighTPseudo:
4463 case ARM::VLD1q16LowTPseudo_UPD:
4464 case ARM::VLD1q32HighQPseudo:
4465 case ARM::VLD1q32LowQPseudo_UPD:
4466 case ARM::VLD1q32HighTPseudo:
4467 case ARM::VLD1q32LowTPseudo_UPD:
4468 case ARM::VLD1q64HighQPseudo:
4469 case ARM::VLD1q64LowQPseudo_UPD:
4470 case ARM::VLD1q64HighTPseudo:
4471 case ARM::VLD1q64LowTPseudo_UPD:
4472 case ARM::VLD4d8Pseudo_UPD:
4473 case ARM::VLD4d16Pseudo_UPD:
4474 case ARM::VLD4d32Pseudo_UPD:
4475 case ARM::VLD4q8Pseudo_UPD:
4476 case ARM::VLD4q16Pseudo_UPD:
4477 case ARM::VLD4q32Pseudo_UPD:
4478 case ARM::VLD4q8oddPseudo:
4479 case ARM::VLD4q16oddPseudo:
4480 case ARM::VLD4q32oddPseudo:
4481 case ARM::VLD4q8oddPseudo_UPD:
4482 case ARM::VLD4q16oddPseudo_UPD:
4483 case ARM::VLD4q32oddPseudo_UPD:
4484 case ARM::VLD1DUPq8:
4485 case ARM::VLD1DUPq16:
4486 case ARM::VLD1DUPq32:
4487 case ARM::VLD1DUPq8wb_fixed:
4488 case ARM::VLD1DUPq16wb_fixed:
4489 case ARM::VLD1DUPq32wb_fixed:
4490 case ARM::VLD1DUPq8wb_register:
4491 case ARM::VLD1DUPq16wb_register:
4492 case ARM::VLD1DUPq32wb_register:
4493 case ARM::VLD2DUPd8:
4494 case ARM::VLD2DUPd16:
4495 case ARM::VLD2DUPd32:
4496 case ARM::VLD2DUPd8wb_fixed:
4497 case ARM::VLD2DUPd16wb_fixed:
4498 case ARM::VLD2DUPd32wb_fixed:
4499 case ARM::VLD2DUPd8wb_register:
4500 case ARM::VLD2DUPd16wb_register:
4501 case ARM::VLD2DUPd32wb_register:
4502 case ARM::VLD2DUPq8EvenPseudo:
4503 case ARM::VLD2DUPq8OddPseudo:
4504 case ARM::VLD2DUPq16EvenPseudo:
4505 case ARM::VLD2DUPq16OddPseudo:
4506 case ARM::VLD2DUPq32EvenPseudo:
4507 case ARM::VLD2DUPq32OddPseudo:
4508 case ARM::VLD3DUPq8EvenPseudo:
4509 case ARM::VLD3DUPq8OddPseudo:
4510 case ARM::VLD3DUPq16EvenPseudo:
4511 case ARM::VLD3DUPq16OddPseudo:
4512 case ARM::VLD3DUPq32EvenPseudo:
4513 case ARM::VLD3DUPq32OddPseudo:
4514 case ARM::VLD4DUPd8Pseudo:
4515 case ARM::VLD4DUPd16Pseudo:
4516 case ARM::VLD4DUPd32Pseudo:
4517 case ARM::VLD4DUPd8Pseudo_UPD:
4518 case ARM::VLD4DUPd16Pseudo_UPD:
4519 case ARM::VLD4DUPd32Pseudo_UPD:
4520 case ARM::VLD4DUPq8EvenPseudo:
4521 case ARM::VLD4DUPq8OddPseudo:
4522 case ARM::VLD4DUPq16EvenPseudo:
4523 case ARM::VLD4DUPq16OddPseudo:
4524 case ARM::VLD4DUPq32EvenPseudo:
4525 case ARM::VLD4DUPq32OddPseudo:
4526 case ARM::VLD1LNq8Pseudo:
4527 case ARM::VLD1LNq16Pseudo:
4528 case ARM::VLD1LNq32Pseudo:
4529 case ARM::VLD1LNq8Pseudo_UPD:
4530 case ARM::VLD1LNq16Pseudo_UPD:
4531 case ARM::VLD1LNq32Pseudo_UPD:
4532 case ARM::VLD2LNd8Pseudo:
4533 case ARM::VLD2LNd16Pseudo:
4534 case ARM::VLD2LNd32Pseudo:
4535 case ARM::VLD2LNq16Pseudo:
4536 case ARM::VLD2LNq32Pseudo:
4537 case ARM::VLD2LNd8Pseudo_UPD:
4538 case ARM::VLD2LNd16Pseudo_UPD:
4539 case ARM::VLD2LNd32Pseudo_UPD:
4540 case ARM::VLD2LNq16Pseudo_UPD:
4541 case ARM::VLD2LNq32Pseudo_UPD:
4542 case ARM::VLD4LNd8Pseudo:
4543 case ARM::VLD4LNd16Pseudo:
4544 case ARM::VLD4LNd32Pseudo:
4545 case ARM::VLD4LNq16Pseudo:
4546 case ARM::VLD4LNq32Pseudo:
4547 case ARM::VLD4LNd8Pseudo_UPD:
4548 case ARM::VLD4LNd16Pseudo_UPD:
4549 case ARM::VLD4LNd32Pseudo_UPD:
4550 case ARM::VLD4LNq16Pseudo_UPD:
4551 case ARM::VLD4LNq32Pseudo_UPD:
4552 // If the address is not 64-bit aligned, the latencies of these
4553 // instructions increases by one.
4554 Latency = *Latency + 1;
4555 break;
4556 }
4557
4558 return Latency;
4559}
4560
4561unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4562 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4563 MI.isImplicitDef())
4564 return 0;
4565
4566 if (MI.isBundle())
4567 return 0;
4568
4569 const MCInstrDesc &MCID = MI.getDesc();
4570
4571 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4572 !Subtarget.cheapPredicableCPSRDef())) {
4573 // When predicated, CPSR is an additional source operand for CPSR updating
4574 // instructions, this apparently increases their latencies.
4575 return 1;
4576 }
4577 return 0;
4578}
4579
4580unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4581 const MachineInstr &MI,
4582 unsigned *PredCost) const {
4583 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4584 MI.isImplicitDef())
4585 return 1;
4586
4587 // An instruction scheduler typically runs on unbundled instructions, however
4588 // other passes may query the latency of a bundled instruction.
4589 if (MI.isBundle()) {
4590 unsigned Latency = 0;
4592 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4593 while (++I != E && I->isInsideBundle()) {
4594 if (I->getOpcode() != ARM::t2IT)
4595 Latency += getInstrLatency(ItinData, *I, PredCost);
4596 }
4597 return Latency;
4598 }
4599
4600 const MCInstrDesc &MCID = MI.getDesc();
4601 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4602 !Subtarget.cheapPredicableCPSRDef()))) {
4603 // When predicated, CPSR is an additional source operand for CPSR updating
4604 // instructions, this apparently increases their latencies.
4605 *PredCost = 1;
4606 }
4607 // Be sure to call getStageLatency for an empty itinerary in case it has a
4608 // valid MinLatency property.
4609 if (!ItinData)
4610 return MI.mayLoad() ? 3 : 1;
4611
4612 unsigned Class = MCID.getSchedClass();
4613
4614 // For instructions with variable uops, use uops as latency.
4615 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4616 return getNumMicroOps(ItinData, MI);
4617
4618 // For the common case, fall back on the itinerary's latency.
4619 unsigned Latency = ItinData->getStageLatency(Class);
4620
4621 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4622 unsigned DefAlign =
4623 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4624 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4625 if (Adj >= 0 || (int)Latency > -Adj) {
4626 return Latency + Adj;
4627 }
4628 return Latency;
4629}
4630
4631unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4632 SDNode *Node) const {
4633 if (!Node->isMachineOpcode())
4634 return 1;
4635
4636 if (!ItinData || ItinData->isEmpty())
4637 return 1;
4638
4639 unsigned Opcode = Node->getMachineOpcode();
4640 switch (Opcode) {
4641 default:
4642 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4643 case ARM::VLDMQIA:
4644 case ARM::VSTMQIA:
4645 return 2;
4646 }
4647}
4648
4649bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4650 const MachineRegisterInfo *MRI,
4651 const MachineInstr &DefMI,
4652 unsigned DefIdx,
4653 const MachineInstr &UseMI,
4654 unsigned UseIdx) const {
4655 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4656 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4657 if (Subtarget.nonpipelinedVFP() &&
4658 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4659 return true;
4660
4661 // Hoist VFP / NEON instructions with 4 or higher latency.
4662 unsigned Latency =
4663 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4664 if (Latency <= 3)
4665 return false;
4666 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4667 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4668}
4669
4670bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4671 const MachineInstr &DefMI,
4672 unsigned DefIdx) const {
4673 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4674 if (!ItinData || ItinData->isEmpty())
4675 return false;
4676
4677 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4678 if (DDomain == ARMII::DomainGeneral) {
4679 unsigned DefClass = DefMI.getDesc().getSchedClass();
4680 std::optional<unsigned> DefCycle =
4681 ItinData->getOperandCycle(DefClass, DefIdx);
4682 return DefCycle && DefCycle <= 2U;
4683 }
4684 return false;
4685}
4686
4687bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4688 StringRef &ErrInfo) const {
4689 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4690 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4691 return false;
4692 }
4693 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4694 // Make sure we don't generate a lo-lo mov that isn't supported.
4695 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4696 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4697 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4698 return false;
4699 }
4700 }
4701 if (MI.getOpcode() == ARM::tPUSH ||
4702 MI.getOpcode() == ARM::tPOP ||
4703 MI.getOpcode() == ARM::tPOP_RET) {
4704 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4705 if (MO.isImplicit() || !MO.isReg())
4706 continue;
4707 Register Reg = MO.getReg();
4708 if (Reg < ARM::R0 || Reg > ARM::R7) {
4709 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4710 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4711 ErrInfo = "Unsupported register in Thumb1 push/pop";
4712 return false;
4713 }
4714 }
4715 }
4716 }
4717 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4718 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4719 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4720 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4721 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4722 return false;
4723 }
4724 }
4725
4726 // Check the address model by taking the first Imm operand and checking it is
4727 // legal for that addressing mode.
4729 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4730 switch (AddrMode) {
4731 default:
4732 break;
4740 case ARMII::AddrModeT2_i12: {
4741 uint32_t Imm = 0;
4742 for (auto Op : MI.operands()) {
4743 if (Op.isImm()) {
4744 Imm = Op.getImm();
4745 break;
4746 }
4747 }
4748 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4749 ErrInfo = "Incorrect AddrMode Imm for instruction";
4750 return false;
4751 }
4752 break;
4753 }
4754 }
4755 return true;
4756}
4757
4759 unsigned LoadImmOpc,
4760 unsigned LoadOpc) const {
4761 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4762 "ROPI/RWPI not currently supported with stack guard");
4763
4764 MachineBasicBlock &MBB = *MI->getParent();
4765 DebugLoc DL = MI->getDebugLoc();
4766 Register Reg = MI->getOperand(0).getReg();
4768 unsigned int Offset = 0;
4769
4770 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4771 assert(!Subtarget.isReadTPSoft() &&
4772 "TLS stack protector requires hardware TLS register");
4773
4774 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4775 .addImm(15)
4776 .addImm(0)
4777 .addImm(13)
4778 .addImm(0)
4779 .addImm(3)
4781
4782 Module &M = *MBB.getParent()->getFunction().getParent();
4783 Offset = M.getStackProtectorGuardOffset();
4784 if (Offset & ~0xfffU) {
4785 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4786 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4787 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4788 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4789 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4790 .addReg(Reg, RegState::Kill)
4791 .addImm(Offset & ~0xfffU)
4793 .addReg(0);
4794 Offset &= 0xfffU;
4795 }
4796 } else {
4797 const GlobalValue *GV =
4798 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4799 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4800
4801 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4802 if (Subtarget.isTargetMachO()) {
4803 TargetFlags |= ARMII::MO_NONLAZY;
4804 } else if (Subtarget.isTargetCOFF()) {
4805 if (GV->hasDLLImportStorageClass())
4806 TargetFlags |= ARMII::MO_DLLIMPORT;
4807 else if (IsIndirect)
4808 TargetFlags |= ARMII::MO_COFFSTUB;
4809 } else if (IsIndirect) {
4810 TargetFlags |= ARMII::MO_GOT;
4811 }
4812
4813 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4814 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4815 auto APSREncoding =
4816 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4817 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4818 .addImm(APSREncoding)
4820 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4821 .addGlobalAddress(GV, 0, TargetFlags);
4822 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4823 .addImm(APSREncoding)
4824 .addReg(CPSRSaveReg, RegState::Kill)
4826 } else {
4827 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4828 .addGlobalAddress(GV, 0, TargetFlags);
4829 }
4830
4831 if (IsIndirect) {
4832 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4833 MIB.addReg(Reg, RegState::Kill).addImm(0);
4834 auto Flags = MachineMemOperand::MOLoad |
4837 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4838 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4840 }
4841 }
4842
4843 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4844 MIB.addReg(Reg, RegState::Kill)
4845 .addImm(Offset)
4846 .cloneMemRefs(*MI)
4848}
4849
4850bool
4851ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4852 unsigned &AddSubOpc,
4853 bool &NegAcc, bool &HasLane) const {
4854 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4855 if (I == MLxEntryMap.end())
4856 return false;
4857
4858 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4859 MulOpc = Entry.MulOpc;
4860 AddSubOpc = Entry.AddSubOpc;
4861 NegAcc = Entry.NegAcc;
4862 HasLane = Entry.HasLane;
4863 return true;
4864}
4865
4866//===----------------------------------------------------------------------===//
4867// Execution domains.
4868//===----------------------------------------------------------------------===//
4869//
4870// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4871// and some can go down both. The vmov instructions go down the VFP pipeline,
4872// but they can be changed to vorr equivalents that are executed by the NEON
4873// pipeline.
4874//
4875// We use the following execution domain numbering:
4876//
4882
4883//
4884// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4885//
4886std::pair<uint16_t, uint16_t>
4888 // If we don't have access to NEON instructions then we won't be able
4889 // to swizzle anything to the NEON domain. Check to make sure.
4890 if (Subtarget.hasNEON()) {
4891 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4892 // if they are not predicated.
4893 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4894 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4895
4896 // CortexA9 is particularly picky about mixing the two and wants these
4897 // converted.
4898 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4899 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4900 MI.getOpcode() == ARM::VMOVS))
4901 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4902 }
4903 // No other instructions can be swizzled, so just determine their domain.
4904 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4905
4907 return std::make_pair(ExeNEON, 0);
4908
4909 // Certain instructions can go either way on Cortex-A8.
4910 // Treat them as NEON instructions.
4911 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4912 return std::make_pair(ExeNEON, 0);
4913
4915 return std::make_pair(ExeVFP, 0);
4916
4917 return std::make_pair(ExeGeneric, 0);
4918}
4919
4921 unsigned SReg, unsigned &Lane) {
4922 MCRegister DReg =
4923 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4924 Lane = 0;
4925
4926 if (DReg)
4927 return DReg;
4928
4929 Lane = 1;
4930 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4931
4932 assert(DReg && "S-register with no D super-register?");
4933 return DReg;
4934}
4935
4936/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4937/// set ImplicitSReg to a register number that must be marked as implicit-use or
4938/// zero if no register needs to be defined as implicit-use.
4939///
4940/// If the function cannot determine if an SPR should be marked implicit use or
4941/// not, it returns false.
4942///
4943/// This function handles cases where an instruction is being modified from taking
4944/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4945/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4946/// lane of the DPR).
4947///
4948/// If the other SPR is defined, an implicit-use of it should be added. Else,
4949/// (including the case where the DPR itself is defined), it should not.
4950///
4952 MachineInstr &MI, MCRegister DReg,
4953 unsigned Lane,
4954 MCRegister &ImplicitSReg) {
4955 // If the DPR is defined or used already, the other SPR lane will be chained
4956 // correctly, so there is nothing to be done.
4957 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4958 ImplicitSReg = MCRegister();
4959 return true;
4960 }
4961
4962 // Otherwise we need to go searching to see if the SPR is set explicitly.
4963 ImplicitSReg = TRI->getSubReg(DReg,
4964 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4966 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4967
4968 if (LQR == MachineBasicBlock::LQR_Live)
4969 return true;
4970 else if (LQR == MachineBasicBlock::LQR_Unknown)
4971 return false;
4972
4973 // If the register is known not to be live, there is no need to add an
4974 // implicit-use.
4975 ImplicitSReg = MCRegister();
4976 return true;
4977}
4978
4980 unsigned Domain) const {
4981 unsigned DstReg, SrcReg;
4982 MCRegister DReg;
4983 unsigned Lane;
4984 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4986 switch (MI.getOpcode()) {
4987 default:
4988 llvm_unreachable("cannot handle opcode!");
4989 break;
4990 case ARM::VMOVD:
4991 if (Domain != ExeNEON)
4992 break;
4993
4994 // Zap the predicate operands.
4995 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4996
4997 // Make sure we've got NEON instructions.
4998 assert(Subtarget.hasNEON() && "VORRd requires NEON");
4999
5000 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5001 DstReg = MI.getOperand(0).getReg();
5002 SrcReg = MI.getOperand(1).getReg();
5003
5004 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5005 MI.removeOperand(i - 1);
5006
5007 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5008 MI.setDesc(get(ARM::VORRd));
5009 MIB.addReg(DstReg, RegState::Define)
5010 .addReg(SrcReg)
5011 .addReg(SrcReg)
5013 break;
5014 case ARM::VMOVRS:
5015 if (Domain != ExeNEON)
5016 break;
5017 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5018
5019 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5020 DstReg = MI.getOperand(0).getReg();
5021 SrcReg = MI.getOperand(1).getReg();
5022
5023 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5024 MI.removeOperand(i - 1);
5025
5026 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5027
5028 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5029 // Note that DSrc has been widened and the other lane may be undef, which
5030 // contaminates the entire register.
5031 MI.setDesc(get(ARM::VGETLNi32));
5032 MIB.addReg(DstReg, RegState::Define)
5033 .addReg(DReg, RegState::Undef)
5034 .addImm(Lane)
5036
5037 // The old source should be an implicit use, otherwise we might think it
5038 // was dead before here.
5039 MIB.addReg(SrcReg, RegState::Implicit);
5040 break;
5041 case ARM::VMOVSR: {
5042 if (Domain != ExeNEON)
5043 break;
5044 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5045
5046 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5047 DstReg = MI.getOperand(0).getReg();
5048 SrcReg = MI.getOperand(1).getReg();
5049
5050 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5051
5052 MCRegister ImplicitSReg;
5053 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5054 break;
5055
5056 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5057 MI.removeOperand(i - 1);
5058
5059 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5060 // Again DDst may be undefined at the beginning of this instruction.
5061 MI.setDesc(get(ARM::VSETLNi32));
5062 MIB.addReg(DReg, RegState::Define)
5063 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5064 .addReg(SrcReg)
5065 .addImm(Lane)
5067
5068 // The narrower destination must be marked as set to keep previous chains
5069 // in place.
5071 if (ImplicitSReg)
5072 MIB.addReg(ImplicitSReg, RegState::Implicit);
5073 break;
5074 }
5075 case ARM::VMOVS: {
5076 if (Domain != ExeNEON)
5077 break;
5078
5079 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5080 DstReg = MI.getOperand(0).getReg();
5081 SrcReg = MI.getOperand(1).getReg();
5082
5083 unsigned DstLane = 0, SrcLane = 0;
5084 MCRegister DDst, DSrc;
5085 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5086 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5087
5088 MCRegister ImplicitSReg;
5089 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5090 break;
5091
5092 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5093 MI.removeOperand(i - 1);
5094
5095 if (DSrc == DDst) {
5096 // Destination can be:
5097 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5098 MI.setDesc(get(ARM::VDUPLN32d));
5099 MIB.addReg(DDst, RegState::Define)
5100 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5101 .addImm(SrcLane)
5103
5104 // Neither the source or the destination are naturally represented any
5105 // more, so add them in manually.
5107 MIB.addReg(SrcReg, RegState::Implicit);
5108 if (ImplicitSReg)
5109 MIB.addReg(ImplicitSReg, RegState::Implicit);
5110 break;
5111 }
5112
5113 // In general there's no single instruction that can perform an S <-> S
5114 // move in NEON space, but a pair of VEXT instructions *can* do the
5115 // job. It turns out that the VEXTs needed will only use DSrc once, with
5116 // the position based purely on the combination of lane-0 and lane-1
5117 // involved. For example
5118 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5119 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5120 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5121 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5122 //
5123 // Pattern of the MachineInstrs is:
5124 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5125 MachineInstrBuilder NewMIB;
5126 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5127 DDst);
5128
5129 // On the first instruction, both DSrc and DDst may be undef if present.
5130 // Specifically when the original instruction didn't have them as an
5131 // <imp-use>.
5132 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5133 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5134 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5135
5136 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5137 CurUndef = !MI.readsRegister(CurReg, TRI);
5138 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5139 .addImm(1)
5141
5142 if (SrcLane == DstLane)
5143 NewMIB.addReg(SrcReg, RegState::Implicit);
5144
5145 MI.setDesc(get(ARM::VEXTd32));
5146 MIB.addReg(DDst, RegState::Define);
5147
5148 // On the second instruction, DDst has definitely been defined above, so
5149 // it is not undef. DSrc, if present, can be undef as above.
5150 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5151 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5152 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5153
5154 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5155 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5156 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5157 .addImm(1)
5159
5160 if (SrcLane != DstLane)
5161 MIB.addReg(SrcReg, RegState::Implicit);
5162
5163 // As before, the original destination is no longer represented, add it
5164 // implicitly.
5166 if (ImplicitSReg != 0)
5167 MIB.addReg(ImplicitSReg, RegState::Implicit);
5168 break;
5169 }
5170 }
5171}
5172
5173//===----------------------------------------------------------------------===//
5174// Partial register updates
5175//===----------------------------------------------------------------------===//
5176//
5177// Swift renames NEON registers with 64-bit granularity. That means any
5178// instruction writing an S-reg implicitly reads the containing D-reg. The
5179// problem is mostly avoided by translating f32 operations to v2f32 operations
5180// on D-registers, but f32 loads are still a problem.
5181//
5182// These instructions can load an f32 into a NEON register:
5183//
5184// VLDRS - Only writes S, partial D update.
5185// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5186// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5187//
5188// FCONSTD can be used as a dependency-breaking instruction.
5190 const MachineInstr &MI, unsigned OpNum,
5191 const TargetRegisterInfo *TRI) const {
5192 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5193 if (!PartialUpdateClearance)
5194 return 0;
5195
5196 assert(TRI && "Need TRI instance");
5197
5198 const MachineOperand &MO = MI.getOperand(OpNum);
5199 if (MO.readsReg())
5200 return 0;
5201 Register Reg = MO.getReg();
5202 int UseOp = -1;
5203
5204 switch (MI.getOpcode()) {
5205 // Normal instructions writing only an S-register.
5206 case ARM::VLDRS:
5207 case ARM::FCONSTS:
5208 case ARM::VMOVSR:
5209 case ARM::VMOVv8i8:
5210 case ARM::VMOVv4i16:
5211 case ARM::VMOVv2i32:
5212 case ARM::VMOVv2f32:
5213 case ARM::VMOVv1i64:
5214 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5215 break;
5216
5217 // Explicitly reads the dependency.
5218 case ARM::VLD1LNd32:
5219 UseOp = 3;
5220 break;
5221 default:
5222 return 0;
5223 }
5224
5225 // If this instruction actually reads a value from Reg, there is no unwanted
5226 // dependency.
5227 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5228 return 0;
5229
5230 // We must be able to clobber the whole D-reg.
5231 if (Reg.isVirtual()) {
5232 // Virtual register must be a def undef foo:ssub_0 operand.
5233 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5234 return 0;
5235 } else if (ARM::SPRRegClass.contains(Reg)) {
5236 // Physical register: MI must define the full D-reg.
5237 MCRegister DReg =
5238 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5239 if (!DReg || !MI.definesRegister(DReg, TRI))
5240 return 0;
5241 }
5242
5243 // MI has an unwanted D-register dependency.
5244 // Avoid defs in the previous N instructrions.
5245 return PartialUpdateClearance;
5246}
5247
5248// Break a partial register dependency after getPartialRegUpdateClearance
5249// returned non-zero.
5251 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5252 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5253 assert(TRI && "Need TRI instance");
5254
5255 const MachineOperand &MO = MI.getOperand(OpNum);
5256 Register Reg = MO.getReg();
5257 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5258 unsigned DReg = Reg;
5259
5260 // If MI defines an S-reg, find the corresponding D super-register.
5261 if (ARM::SPRRegClass.contains(Reg)) {
5262 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5263 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5264 }
5265
5266 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5267 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5268
5269 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5270 // the full D-register by loading the same value to both lanes. The
5271 // instruction is micro-coded with 2 uops, so don't do this until we can
5272 // properly schedule micro-coded instructions. The dispatcher stalls cause
5273 // too big regressions.
5274
5275 // Insert the dependency-breaking FCONSTD before MI.
5276 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5277 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5278 .addImm(96)
5280 MI.addRegisterKilled(DReg, TRI, true);
5281}
5282
5284 return Subtarget.hasFeature(ARM::HasV6KOps);
5285}
5286
5288 if (MI->getNumOperands() < 4)
5289 return true;
5290 unsigned ShOpVal = MI->getOperand(3).getImm();
5291 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5292 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5293 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5294 ((ShImm == 1 || ShImm == 2) &&
5295 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5296 return true;
5297
5298 return false;
5299}
5300
5302 const MachineInstr &MI, unsigned DefIdx,
5303 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5304 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5305 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5306
5307 switch (MI.getOpcode()) {
5308 case ARM::VMOVDRR:
5309 // dX = VMOVDRR rY, rZ
5310 // is the same as:
5311 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5312 // Populate the InputRegs accordingly.
5313 // rY
5314 const MachineOperand *MOReg = &MI.getOperand(1);
5315 if (!MOReg->isUndef())
5316 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5317 MOReg->getSubReg(), ARM::ssub_0));
5318 // rZ
5319 MOReg = &MI.getOperand(2);
5320 if (!MOReg->isUndef())
5321 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5322 MOReg->getSubReg(), ARM::ssub_1));
5323 return true;
5324 }
5325 llvm_unreachable("Target dependent opcode missing");
5326}
5327
5329 const MachineInstr &MI, unsigned DefIdx,
5330 RegSubRegPairAndIdx &InputReg) const {
5331 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5332 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5333
5334 switch (MI.getOpcode()) {
5335 case ARM::VMOVRRD:
5336 // rX, rY = VMOVRRD dZ
5337 // is the same as:
5338 // rX = EXTRACT_SUBREG dZ, ssub_0
5339 // rY = EXTRACT_SUBREG dZ, ssub_1
5340 const MachineOperand &MOReg = MI.getOperand(2);
5341 if (MOReg.isUndef())
5342 return false;
5343 InputReg.Reg = MOReg.getReg();
5344 InputReg.SubReg = MOReg.getSubReg();
5345 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5346 return true;
5347 }
5348 llvm_unreachable("Target dependent opcode missing");
5349}
5350
5352 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5353 RegSubRegPairAndIdx &InsertedReg) const {
5354 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5355 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5356
5357 switch (MI.getOpcode()) {
5358 case ARM::VSETLNi32:
5359 case ARM::MVE_VMOV_to_lane_32:
5360 // dX = VSETLNi32 dY, rZ, imm
5361 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5362 const MachineOperand &MOBaseReg = MI.getOperand(1);
5363 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5364 if (MOInsertedReg.isUndef())
5365 return false;
5366 const MachineOperand &MOIndex = MI.getOperand(3);
5367 BaseReg.Reg = MOBaseReg.getReg();
5368 BaseReg.SubReg = MOBaseReg.getSubReg();
5369
5370 InsertedReg.Reg = MOInsertedReg.getReg();
5371 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5372 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5373 return true;
5374 }
5375 llvm_unreachable("Target dependent opcode missing");
5376}
5377
5378std::pair<unsigned, unsigned>
5380 const unsigned Mask = ARMII::MO_OPTION_MASK;
5381 return std::make_pair(TF & Mask, TF & ~Mask);
5382}
5383
5386 using namespace ARMII;
5387
5388 static const std::pair<unsigned, const char *> TargetFlags[] = {
5389 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5390 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5391 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5392 };
5393 return ArrayRef(TargetFlags);
5394}
5395
5398 using namespace ARMII;
5399
5400 static const std::pair<unsigned, const char *> TargetFlags[] = {
5401 {MO_COFFSTUB, "arm-coffstub"},
5402 {MO_GOT, "arm-got"},
5403 {MO_SBREL, "arm-sbrel"},
5404 {MO_DLLIMPORT, "arm-dllimport"},
5405 {MO_SECREL, "arm-secrel"},
5406 {MO_NONLAZY, "arm-nonlazy"}};
5407 return ArrayRef(TargetFlags);
5408}
5409
5410std::optional<RegImmPair>
5412 int Sign = 1;
5413 unsigned Opcode = MI.getOpcode();
5414 int64_t Offset = 0;
5415
5416 // TODO: Handle cases where Reg is a super- or sub-register of the
5417 // destination register.
5418 const MachineOperand &Op0 = MI.getOperand(0);
5419 if (!Op0.isReg() || Reg != Op0.getReg())
5420 return std::nullopt;
5421
5422 // We describe SUBri or ADDri instructions.
5423 if (Opcode == ARM::SUBri)
5424 Sign = -1;
5425 else if (Opcode != ARM::ADDri)
5426 return std::nullopt;
5427
5428 // TODO: Third operand can be global address (usually some string). Since
5429 // strings can be relocated we cannot calculate their offsets for
5430 // now.
5431 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5432 return std::nullopt;
5433
5434 Offset = MI.getOperand(2).getImm() * Sign;
5435 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5436}
5437
5441 const TargetRegisterInfo *TRI) {
5442 for (auto I = From; I != To; ++I)
5443 if (I->modifiesRegister(Reg, TRI))
5444 return true;
5445 return false;
5446}
5447
5449 const TargetRegisterInfo *TRI) {
5450 // Search backwards to the instruction that defines CSPR. This may or not
5451 // be a CMP, we check that after this loop. If we find another instruction
5452 // that reads cpsr, we return nullptr.
5453 MachineBasicBlock::iterator CmpMI = Br;
5454 while (CmpMI != Br->getParent()->begin()) {
5455 --CmpMI;
5456 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5457 break;
5458 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5459 break;
5460 }
5461
5462 // Check that this inst is a CMP r[0-7], #0 and that the register
5463 // is not redefined between the cmp and the br.
5464 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5465 return nullptr;
5466 Register Reg = CmpMI->getOperand(0).getReg();
5467 Register PredReg;
5468 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5469 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5470 return nullptr;
5471 if (!isARMLowRegister(Reg))
5472 return nullptr;
5473 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5474 return nullptr;
5475
5476 return &*CmpMI;
5477}
5478
5480 const ARMSubtarget *Subtarget,
5481 bool ForCodesize) {
5482 if (Subtarget->isThumb()) {
5483 if (Val <= 255) // MOV
5484 return ForCodesize ? 2 : 1;
5485 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5486 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5487 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5488 return ForCodesize ? 4 : 1;
5489 if (Val <= 510) // MOV + ADDi8
5490 return ForCodesize ? 4 : 2;
5491 if (~Val <= 255) // MOV + MVN
5492 return ForCodesize ? 4 : 2;
5493 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5494 return ForCodesize ? 4 : 2;
5495 } else {
5496 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5497 return ForCodesize ? 4 : 1;
5498 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5499 return ForCodesize ? 4 : 1;
5500 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5501 return ForCodesize ? 4 : 1;
5502 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5503 return ForCodesize ? 8 : 2;
5504 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5505 return ForCodesize ? 8 : 2;
5506 }
5507 if (Subtarget->useMovt()) // MOVW + MOVT
5508 return ForCodesize ? 8 : 2;
5509 return ForCodesize ? 8 : 3; // Literal pool load
5510}
5511
5512bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5513 const ARMSubtarget *Subtarget,
5514 bool ForCodesize) {
5515 // Check with ForCodesize
5516 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5517 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5518 if (Cost1 < Cost2)
5519 return true;
5520 if (Cost1 > Cost2)
5521 return false;
5522
5523 // If they are equal, try with !ForCodesize
5524 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5525 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5526}
5527
5528/// Constants defining how certain sequences should be outlined.
5529/// This encompasses how an outlined function should be called, and what kind of
5530/// frame should be emitted for that outlined function.
5531///
5532/// \p MachineOutlinerTailCall implies that the function is being created from
5533/// a sequence of instructions ending in a return.
5534///
5535/// That is,
5536///
5537/// I1 OUTLINED_FUNCTION:
5538/// I2 --> B OUTLINED_FUNCTION I1
5539/// BX LR I2
5540/// BX LR
5541///
5542/// +-------------------------+--------+-----+
5543/// | | Thumb2 | ARM |
5544/// +-------------------------+--------+-----+
5545/// | Call overhead in Bytes | 4 | 4 |
5546/// | Frame overhead in Bytes | 0 | 0 |
5547/// | Stack fixup required | No | No |
5548/// +-------------------------+--------+-----+
5549///
5550/// \p MachineOutlinerThunk implies that the function is being created from
5551/// a sequence of instructions ending in a call. The outlined function is
5552/// called with a BL instruction, and the outlined function tail-calls the
5553/// original call destination.
5554///
5555/// That is,
5556///
5557/// I1 OUTLINED_FUNCTION:
5558/// I2 --> BL OUTLINED_FUNCTION I1
5559/// BL f I2
5560/// B f
5561///
5562/// +-------------------------+--------+-----+
5563/// | | Thumb2 | ARM |
5564/// +-------------------------+--------+-----+
5565/// | Call overhead in Bytes | 4 | 4 |
5566/// | Frame overhead in Bytes | 0 | 0 |
5567/// | Stack fixup required | No | No |
5568/// +-------------------------+--------+-----+
5569///
5570/// \p MachineOutlinerNoLRSave implies that the function should be called using
5571/// a BL instruction, but doesn't require LR to be saved and restored. This
5572/// happens when LR is known to be dead.
5573///
5574/// That is,
5575///
5576/// I1 OUTLINED_FUNCTION:
5577/// I2 --> BL OUTLINED_FUNCTION I1
5578/// I3 I2
5579/// I3
5580/// BX LR
5581///
5582/// +-------------------------+--------+-----+
5583/// | | Thumb2 | ARM |
5584/// +-------------------------+--------+-----+
5585/// | Call overhead in Bytes | 4 | 4 |
5586/// | Frame overhead in Bytes | 2 | 4 |
5587/// | Stack fixup required | No | No |
5588/// +-------------------------+--------+-----+
5589///
5590/// \p MachineOutlinerRegSave implies that the function should be called with a
5591/// save and restore of LR to an available register. This allows us to avoid
5592/// stack fixups. Note that this outlining variant is compatible with the
5593/// NoLRSave case.
5594///
5595/// That is,
5596///
5597/// I1 Save LR OUTLINED_FUNCTION:
5598/// I2 --> BL OUTLINED_FUNCTION I1
5599/// I3 Restore LR I2
5600/// I3
5601/// BX LR
5602///
5603/// +-------------------------+--------+-----+
5604/// | | Thumb2 | ARM |
5605/// +-------------------------+--------+-----+
5606/// | Call overhead in Bytes | 8 | 12 |
5607/// | Frame overhead in Bytes | 2 | 4 |
5608/// | Stack fixup required | No | No |
5609/// +-------------------------+--------+-----+
5610///
5611/// \p MachineOutlinerDefault implies that the function should be called with
5612/// a save and restore of LR to the stack.
5613///
5614/// That is,
5615///
5616/// I1 Save LR OUTLINED_FUNCTION:
5617/// I2 --> BL OUTLINED_FUNCTION I1
5618/// I3 Restore LR I2
5619/// I3
5620/// BX LR
5621///
5622/// +-------------------------+--------+-----+
5623/// | | Thumb2 | ARM |
5624/// +-------------------------+--------+-----+
5625/// | Call overhead in Bytes | 8 | 12 |
5626/// | Frame overhead in Bytes | 2 | 4 |
5627/// | Stack fixup required | Yes | Yes |
5628/// +-------------------------+--------+-----+
5629
5637
5643
5656
5658 : CallTailCall(target.isThumb() ? 4 : 4),
5659 FrameTailCall(target.isThumb() ? 0 : 0),
5660 CallThunk(target.isThumb() ? 4 : 4),
5661 FrameThunk(target.isThumb() ? 0 : 0),
5662 CallNoLRSave(target.isThumb() ? 4 : 4),
5663 FrameNoLRSave(target.isThumb() ? 2 : 4),
5664 CallRegSave(target.isThumb() ? 8 : 12),
5665 FrameRegSave(target.isThumb() ? 2 : 4),
5666 CallDefault(target.isThumb() ? 8 : 12),
5667 FrameDefault(target.isThumb() ? 2 : 4),
5668 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5669};
5670
5672ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5673 MachineFunction *MF = C.getMF();
5674 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5675 const ARMBaseRegisterInfo *ARI =
5676 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5677
5678 BitVector regsReserved = ARI->getReservedRegs(*MF);
5679 // Check if there is an available register across the sequence that we can
5680 // use.
5681 for (Register Reg : ARM::rGPRRegClass) {
5682 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5683 Reg != ARM::LR && // LR is not reserved, but don't use it.
5684 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5685 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5686 C.isAvailableInsideSeq(Reg, TRI))
5687 return Reg;
5688 }
5689 return Register();
5690}
5691
5692// Compute liveness of LR at the point after the interval [I, E), which
5693// denotes a *backward* iteration through instructions. Used only for return
5694// basic blocks, which do not end with a tail call.
5698 // At the end of the function LR dead.
5699 bool Live = false;
5700 for (; I != E; ++I) {
5701 const MachineInstr &MI = *I;
5702
5703 // Check defs of LR.
5704 if (MI.modifiesRegister(ARM::LR, &TRI))
5705 Live = false;
5706
5707 // Check uses of LR.
5708 unsigned Opcode = MI.getOpcode();
5709 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5710 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5711 Opcode == ARM::tBXNS_RET) {
5712 // These instructions use LR, but it's not an (explicit or implicit)
5713 // operand.
5714 Live = true;
5715 continue;
5716 }
5717 if (MI.readsRegister(ARM::LR, &TRI))
5718 Live = true;
5719 }
5720 return !Live;
5721}
5722
5723std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5725 const MachineModuleInfo &MMI,
5726 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5727 unsigned MinRepeats) const {
5728 unsigned SequenceSize = 0;
5729 for (auto &MI : RepeatedSequenceLocs[0])
5730 SequenceSize += getInstSizeInBytes(MI);
5731
5732 // Properties about candidate MBBs that hold for all of them.
5733 unsigned FlagsSetInAll = 0xF;
5734
5735 // Compute liveness information for each candidate, and set FlagsSetInAll.
5737 for (outliner::Candidate &C : RepeatedSequenceLocs)
5738 FlagsSetInAll &= C.Flags;
5739
5740 // According to the ARM Procedure Call Standard, the following are
5741 // undefined on entry/exit from a function call:
5742 //
5743 // * Register R12(IP),
5744 // * Condition codes (and thus the CPSR register)
5745 //
5746 // Since we control the instructions which are part of the outlined regions
5747 // we don't need to be fully compliant with the AAPCS, but we have to
5748 // guarantee that if a veneer is inserted at link time the code is still
5749 // correct. Because of this, we can't outline any sequence of instructions
5750 // where one of these registers is live into/across it. Thus, we need to
5751 // delete those candidates.
5752 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5753 // If the unsafe registers in this block are all dead, then we don't need
5754 // to compute liveness here.
5755 if (C.Flags & UnsafeRegsDead)
5756 return false;
5757 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5758 };
5759
5760 // Are there any candidates where those registers are live?
5761 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5762 // Erase every candidate that violates the restrictions above. (It could be
5763 // true that we have viable candidates, so it's not worth bailing out in
5764 // the case that, say, 1 out of 20 candidates violate the restructions.)
5765 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5766
5767 // If the sequence doesn't have enough candidates left, then we're done.
5768 if (RepeatedSequenceLocs.size() < MinRepeats)
5769 return std::nullopt;
5770 }
5771
5772 // We expect the majority of the outlining candidates to be in consensus with
5773 // regard to return address sign and authentication, and branch target
5774 // enforcement, in other words, partitioning according to all the four
5775 // possible combinations of PAC-RET and BTI is going to yield one big subset
5776 // and three small (likely empty) subsets. That allows us to cull incompatible
5777 // candidates separately for PAC-RET and BTI.
5778
5779 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5780 // disabled. Remove the candidates from the smaller set. If they are the same
5781 // number prefer the non-BTI ones for outlining, since they have less
5782 // overhead.
5783 auto NoBTI =
5784 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5785 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5786 return AFI.branchTargetEnforcement();
5787 });
5788 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5789 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5790 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5791 else
5792 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5793
5794 if (RepeatedSequenceLocs.size() < MinRepeats)
5795 return std::nullopt;
5796
5797 // Likewise, partition the candidates according to PAC-RET enablement.
5798 auto NoPAC =
5799 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5800 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5801 // If the function happens to not spill the LR, do not disqualify it
5802 // from the outlining.
5803 return AFI.shouldSignReturnAddress(true);
5804 });
5805 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5806 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5807 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5808 else
5809 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5810
5811 if (RepeatedSequenceLocs.size() < MinRepeats)
5812 return std::nullopt;
5813
5814 // At this point, we have only "safe" candidates to outline. Figure out
5815 // frame + call instruction information.
5816
5817 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5818
5819 // Helper lambda which sets call information for every candidate.
5820 auto SetCandidateCallInfo =
5821 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5822 for (outliner::Candidate &C : RepeatedSequenceLocs)
5823 C.setCallInfo(CallID, NumBytesForCall);
5824 };
5825
5826 OutlinerCosts Costs(Subtarget);
5827
5828 const auto &SomeMFI =
5829 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5830 // Adjust costs to account for the BTI instructions.
5831 if (SomeMFI.branchTargetEnforcement()) {
5832 Costs.FrameDefault += 4;
5833 Costs.FrameNoLRSave += 4;
5834 Costs.FrameRegSave += 4;
5835 Costs.FrameTailCall += 4;
5836 Costs.FrameThunk += 4;
5837 }
5838
5839 // Adjust costs to account for sign and authentication instructions.
5840 if (SomeMFI.shouldSignReturnAddress(true)) {
5841 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5842 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5843 }
5844
5845 unsigned FrameID = MachineOutlinerDefault;
5846 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5847
5848 // If the last instruction in any candidate is a terminator, then we should
5849 // tail call all of the candidates.
5850 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5851 FrameID = MachineOutlinerTailCall;
5852 NumBytesToCreateFrame = Costs.FrameTailCall;
5853 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5854 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5855 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5856 LastInstrOpcode == ARM::tBLXr ||
5857 LastInstrOpcode == ARM::tBLXr_noip ||
5858 LastInstrOpcode == ARM::tBLXi) {
5859 FrameID = MachineOutlinerThunk;
5860 NumBytesToCreateFrame = Costs.FrameThunk;
5861 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5862 } else {
5863 // We need to decide how to emit calls + frames. We can always emit the same
5864 // frame if we don't need to save to the stack. If we have to save to the
5865 // stack, then we need a different frame.
5866 unsigned NumBytesNoStackCalls = 0;
5867 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5868
5869 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5870 // LR liveness is overestimated in return blocks, unless they end with a
5871 // tail call.
5872 const auto Last = C.getMBB()->rbegin();
5873 const bool LRIsAvailable =
5874 C.getMBB()->isReturnBlock() && !Last->isCall()
5877 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
5878 if (LRIsAvailable) {
5879 FrameID = MachineOutlinerNoLRSave;
5880 NumBytesNoStackCalls += Costs.CallNoLRSave;
5881 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
5882 CandidatesWithoutStackFixups.push_back(C);
5883 }
5884
5885 // Is an unused register available? If so, we won't modify the stack, so
5886 // we can outline with the same frame type as those that don't save LR.
5887 else if (findRegisterToSaveLRTo(C)) {
5888 FrameID = MachineOutlinerRegSave;
5889 NumBytesNoStackCalls += Costs.CallRegSave;
5890 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
5891 CandidatesWithoutStackFixups.push_back(C);
5892 }
5893
5894 // Is SP used in the sequence at all? If not, we don't have to modify
5895 // the stack, so we are guaranteed to get the same frame.
5896 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
5897 NumBytesNoStackCalls += Costs.CallDefault;
5898 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5899 CandidatesWithoutStackFixups.push_back(C);
5900 }
5901
5902 // If we outline this, we need to modify the stack. Pretend we don't
5903 // outline this by saving all of its bytes.
5904 else
5905 NumBytesNoStackCalls += SequenceSize;
5906 }
5907
5908 // If there are no places where we have to save LR, then note that we don't
5909 // have to update the stack. Otherwise, give every candidate the default
5910 // call type
5911 if (NumBytesNoStackCalls <=
5912 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5913 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5914 FrameID = MachineOutlinerNoLRSave;
5915 if (RepeatedSequenceLocs.size() < MinRepeats)
5916 return std::nullopt;
5917 } else
5918 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5919 }
5920
5921 // Does every candidate's MBB contain a call? If so, then we might have a
5922 // call in the range.
5923 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5924 // check if the range contains a call. These require a save + restore of
5925 // the link register.
5926 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5927 if (any_of(drop_end(FirstCand),
5928 [](const MachineInstr &MI) { return MI.isCall(); }))
5929 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5930
5931 // Handle the last instruction separately. If it is tail call, then the
5932 // last instruction is a call, we don't want to save + restore in this
5933 // case. However, it could be possible that the last instruction is a
5934 // call without it being valid to tail call this sequence. We should
5935 // consider this as well.
5936 else if (FrameID != MachineOutlinerThunk &&
5937 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5938 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5939 }
5940
5941 return std::make_unique<outliner::OutlinedFunction>(
5942 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
5943}
5944
5945bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5946 int64_t Fixup,
5947 bool Updt) const {
5948 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
5949 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5950 if (SPIdx < 0)
5951 // No SP operand
5952 return true;
5953 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5954 // If SP is not the base register we can't do much
5955 return false;
5956
5957 // Stack might be involved but addressing mode doesn't handle any offset.
5958 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5959 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5960 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5961 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5962 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5963 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5964 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5965 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5966 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5967 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5969 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5970 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5971 return false;
5972
5973 unsigned NumOps = MI->getDesc().getNumOperands();
5974 unsigned ImmIdx = NumOps - 3;
5975
5976 const MachineOperand &Offset = MI->getOperand(ImmIdx);
5977 assert(Offset.isImm() && "Is not an immediate");
5978 int64_t OffVal = Offset.getImm();
5979
5980 if (OffVal < 0)
5981 // Don't override data if the are below SP.
5982 return false;
5983
5984 unsigned NumBits = 0;
5985 unsigned Scale = 1;
5986
5987 switch (AddrMode) {
5988 case ARMII::AddrMode3:
5989 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
5990 return false;
5991 OffVal = ARM_AM::getAM3Offset(OffVal);
5992 NumBits = 8;
5993 break;
5994 case ARMII::AddrMode5:
5995 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
5996 return false;
5997 OffVal = ARM_AM::getAM5Offset(OffVal);
5998 NumBits = 8;
5999 Scale = 4;
6000 break;
6002 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6003 return false;
6004 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6005 NumBits = 8;
6006 Scale = 2;
6007 break;
6009 NumBits = 8;
6010 break;
6012 // FIXME: Values are already scaled in this addressing mode.
6013 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6014 NumBits = 10;
6015 break;
6017 NumBits = 8;
6018 Scale = 4;
6019 break;
6022 NumBits = 12;
6023 break;
6024 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6025 NumBits = 8;
6026 Scale = 4;
6027 break;
6028 default:
6029 llvm_unreachable("Unsupported addressing mode!");
6030 }
6031 // Make sure the offset is encodable for instructions that scale the
6032 // immediate.
6033 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6034 "Can't encode this offset!");
6035 OffVal += Fixup / Scale;
6036
6037 unsigned Mask = (1 << NumBits) - 1;
6038
6039 if (OffVal <= Mask) {
6040 if (Updt)
6041 MI->getOperand(ImmIdx).setImm(OffVal);
6042 return true;
6043 }
6044
6045 return false;
6046}
6047
6049 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6050 outliner::Candidate &C = Candidates.front();
6051 // branch-target-enforcement is guaranteed to be consistent between all
6052 // candidates, so we only need to look at one.
6053 const Function &CFn = C.getMF()->getFunction();
6054 if (CFn.hasFnAttribute("branch-target-enforcement"))
6055 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6056
6057 if (CFn.hasFnAttribute("sign-return-address"))
6058 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6059
6060 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6061}
6062
6064 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6065 const Function &F = MF.getFunction();
6066
6067 // Can F be deduplicated by the linker? If it can, don't outline from it.
6068 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6069 return false;
6070
6071 // Don't outline from functions with section markings; the program could
6072 // expect that all the code is in the named section.
6073 // FIXME: Allow outlining from multiple functions with the same section
6074 // marking.
6075 if (F.hasSection())
6076 return false;
6077
6078 // FIXME: Thumb1 outlining is not handled
6080 return false;
6081
6082 // It's safe to outline from MF.
6083 return true;
6084}
6085
6087 unsigned &Flags) const {
6088 // Check if LR is available through all of the MBB. If it's not, then set
6089 // a flag.
6090 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6091 "Suitable Machine Function for outlining must track liveness");
6092
6094
6096 LRU.accumulate(MI);
6097
6098 // Check if each of the unsafe registers are available...
6099 bool R12AvailableInBlock = LRU.available(ARM::R12);
6100 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6101
6102 // If all of these are dead (and not live out), we know we don't have to check
6103 // them later.
6104 if (R12AvailableInBlock && CPSRAvailableInBlock)
6106
6107 // Now, add the live outs to the set.
6108 LRU.addLiveOuts(MBB);
6109
6110 // If any of these registers is available in the MBB, but also a live out of
6111 // the block, then we know outlining is unsafe.
6112 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6113 return false;
6114 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6115 return false;
6116
6117 // Check if there's a call inside this MachineBasicBlock. If there is, then
6118 // set a flag.
6119 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6121
6122 // LR liveness is overestimated in return blocks.
6123
6124 bool LRIsAvailable =
6125 MBB.isReturnBlock() && !MBB.back().isCall()
6126 ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
6127 : LRU.available(ARM::LR);
6128 if (!LRIsAvailable)
6130
6131 return true;
6132}
6133
6137 unsigned Flags) const {
6138 MachineInstr &MI = *MIT;
6140
6141 // PIC instructions contain labels, outlining them would break offset
6142 // computing. unsigned Opc = MI.getOpcode();
6143 unsigned Opc = MI.getOpcode();
6144 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6145 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6146 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6147 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6148 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6149 Opc == ARM::t2MOV_ga_pcrel)
6151
6152 // Be conservative with ARMv8.1 MVE instructions.
6153 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6154 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6155 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6156 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6157 Opc == ARM::t2LoopEndDec)
6159
6160 const MCInstrDesc &MCID = MI.getDesc();
6161 uint64_t MIFlags = MCID.TSFlags;
6162 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6164
6165 // Is this a terminator for a basic block?
6166 if (MI.isTerminator())
6167 // TargetInstrInfo::getOutliningType has already filtered out anything
6168 // that would break this, so we can allow it here.
6170
6171 // Don't outline if link register or program counter value are used.
6172 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6174
6175 if (MI.isCall()) {
6176 // Get the function associated with the call. Look at each operand and find
6177 // the one that represents the calle and get its name.
6178 const Function *Callee = nullptr;
6179 for (const MachineOperand &MOP : MI.operands()) {
6180 if (MOP.isGlobal()) {
6181 Callee = dyn_cast<Function>(MOP.getGlobal());
6182 break;
6183 }
6184 }
6185
6186 // Dont't outline calls to "mcount" like functions, in particular Linux
6187 // kernel function tracing relies on it.
6188 if (Callee &&
6189 (Callee->getName() == "\01__gnu_mcount_nc" ||
6190 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6192
6193 // If we don't know anything about the callee, assume it depends on the
6194 // stack layout of the caller. In that case, it's only legal to outline
6195 // as a tail-call. Explicitly list the call instructions we know about so
6196 // we don't get unexpected results with call pseudo-instructions.
6197 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6198 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6199 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6200 Opc == ARM::tBLXi)
6201 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6202
6203 if (!Callee)
6204 return UnknownCallOutlineType;
6205
6206 // We have a function we have information about. Check if it's something we
6207 // can safely outline.
6208 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6209
6210 // We don't know what's going on with the callee at all. Don't touch it.
6211 if (!CalleeMF)
6212 return UnknownCallOutlineType;
6213
6214 // Check if we know anything about the callee saves on the function. If we
6215 // don't, then don't touch it, since that implies that we haven't computed
6216 // anything about its stack frame yet.
6217 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6218 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6219 MFI.getNumObjects() > 0)
6220 return UnknownCallOutlineType;
6221
6222 // At this point, we can say that CalleeMF ought to not pass anything on the
6223 // stack. Therefore, we can outline it.
6225 }
6226
6227 // Since calls are handled, don't touch LR or PC
6228 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6230
6231 // Does this use the stack?
6232 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6233 // True if there is no chance that any outlined candidate from this range
6234 // could require stack fixups. That is, both
6235 // * LR is available in the range (No save/restore around call)
6236 // * The range doesn't include calls (No save/restore in outlined frame)
6237 // are true.
6238 // These conditions also ensure correctness of the return address
6239 // authentication - we insert sign and authentication instructions only if
6240 // we save/restore LR on stack, but then this condition ensures that the
6241 // outlined range does not modify the SP, therefore the SP value used for
6242 // signing is the same as the one used for authentication.
6243 // FIXME: This is very restrictive; the flags check the whole block,
6244 // not just the bit we will try to outline.
6245 bool MightNeedStackFixUp =
6248
6249 if (!MightNeedStackFixUp)
6251
6252 // Any modification of SP will break our code to save/restore LR.
6253 // FIXME: We could handle some instructions which add a constant offset to
6254 // SP, with a bit more work.
6255 if (MI.modifiesRegister(ARM::SP, TRI))
6257
6258 // At this point, we have a stack instruction that we might need to fix up.
6259 // up. We'll handle it if it's a load or store.
6260 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6261 false))
6263
6264 // We can't fix it up, so don't outline it.
6266 }
6267
6268 // Be conservative with IT blocks.
6269 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6270 MI.modifiesRegister(ARM::ITSTATE, TRI))
6272
6273 // Don't outline CFI instructions.
6274 if (MI.isCFIInstruction())
6276
6278}
6279
6280void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6281 for (MachineInstr &MI : MBB) {
6282 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6283 }
6284}
6285
6286void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6287 MachineBasicBlock::iterator It, bool CFI,
6288 bool Auth) const {
6289 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6290 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6291 assert(Align >= 8 && Align <= 256);
6292 if (Auth) {
6293 assert(Subtarget.isThumb2());
6294 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6295 // sequence.
6296 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6297 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6298 .addReg(ARM::R12, RegState::Kill)
6299 .addReg(ARM::LR, RegState::Kill)
6300 .addReg(ARM::SP)
6301 .addImm(-Align)
6303 .setMIFlags(MIFlags);
6304 } else {
6305 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6306 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6307 .addReg(ARM::LR, RegState::Kill)
6308 .addReg(ARM::SP)
6309 .addImm(-Align)
6311 .setMIFlags(MIFlags);
6312 }
6313
6314 if (!CFI)
6315 return;
6316
6317 // Add a CFI, saying CFA is offset by Align bytes from SP.
6318 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
6319 CFIBuilder.buildDefCFAOffset(Align);
6320
6321 // Add a CFI saying that the LR that we want to find is now higher than
6322 // before.
6323 int LROffset = Auth ? Align - 4 : Align;
6324 CFIBuilder.buildOffset(ARM::LR, -LROffset);
6325 if (Auth) {
6326 // Add a CFI for the location of the return adddress PAC.
6327 CFIBuilder.buildOffset(ARM::RA_AUTH_CODE, -Align);
6328 }
6329}
6330
6331void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6333 bool CFI, bool Auth) const {
6334 int Align = Subtarget.getStackAlignment().value();
6335 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6336 if (Auth) {
6337 assert(Subtarget.isThumb2());
6338 // Restore return address PAC and LR.
6339 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6340 .addReg(ARM::R12, RegState::Define)
6341 .addReg(ARM::LR, RegState::Define)
6342 .addReg(ARM::SP, RegState::Define)
6343 .addReg(ARM::SP)
6344 .addImm(Align)
6346 .setMIFlags(MIFlags);
6347 // LR authentication is after the CFI instructions, below.
6348 } else {
6349 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6350 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6351 .addReg(ARM::SP, RegState::Define)
6352 .addReg(ARM::SP);
6353 if (!Subtarget.isThumb())
6354 MIB.addReg(0);
6355 MIB.addImm(Subtarget.getStackAlignment().value())
6357 .setMIFlags(MIFlags);
6358 }
6359
6360 if (CFI) {
6361 // Now stack has moved back up and we have restored LR.
6362 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy);
6363 CFIBuilder.buildDefCFAOffset(0);
6364 CFIBuilder.buildRestore(ARM::LR);
6365 if (Auth)
6366 CFIBuilder.buildUndefined(ARM::RA_AUTH_CODE);
6367 }
6368
6369 if (Auth)
6370 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6371}
6372
6375 const outliner::OutlinedFunction &OF) const {
6376 // For thunk outlining, rewrite the last instruction from a call to a
6377 // tail-call.
6378 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6379 MachineInstr *Call = &*--MBB.instr_end();
6380 bool isThumb = Subtarget.isThumb();
6381 unsigned FuncOp = isThumb ? 2 : 0;
6382 unsigned Opc = Call->getOperand(FuncOp).isReg()
6383 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6384 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6385 : ARM::tTAILJMPdND
6386 : ARM::TAILJMPd;
6387 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6388 .add(Call->getOperand(FuncOp));
6389 if (isThumb && !Call->getOperand(FuncOp).isReg())
6390 MIB.add(predOps(ARMCC::AL));
6391 Call->eraseFromParent();
6392 }
6393
6394 // Is there a call in the outlined range?
6395 auto IsNonTailCall = [](MachineInstr &MI) {
6396 return MI.isCall() && !MI.isReturn();
6397 };
6398 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6399 MachineBasicBlock::iterator It = MBB.begin();
6401
6402 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6403 OF.FrameConstructionID == MachineOutlinerThunk)
6404 Et = std::prev(MBB.end());
6405
6406 // We have to save and restore LR, we need to add it to the liveins if it
6407 // is not already part of the set. This is suffient since outlined
6408 // functions only have one block.
6409 if (!MBB.isLiveIn(ARM::LR))
6410 MBB.addLiveIn(ARM::LR);
6411
6412 // Insert a save before the outlined region
6413 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6414 saveLROnStack(MBB, It, true, Auth);
6415
6416 // Fix up the instructions in the range, since we're going to modify the
6417 // stack.
6418 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6419 "Can only fix up stack references once");
6420 fixupPostOutline(MBB);
6421
6422 // Insert a restore before the terminator for the function. Restore LR.
6423 restoreLRFromStack(MBB, Et, true, Auth);
6424 }
6425
6426 // If this is a tail call outlined function, then there's already a return.
6427 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6428 OF.FrameConstructionID == MachineOutlinerThunk)
6429 return;
6430
6431 // Here we have to insert the return ourselves. Get the correct opcode from
6432 // current feature set.
6433 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6435
6436 // Did we have to modify the stack by saving the link register?
6437 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6438 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6439 return;
6440
6441 // We modified the stack.
6442 // Walk over the basic block and fix up all the stack accesses.
6443 fixupPostOutline(MBB);
6444}
6445
6451 unsigned Opc;
6452 bool isThumb = Subtarget.isThumb();
6453
6454 // Are we tail calling?
6455 if (C.CallConstructionID == MachineOutlinerTailCall) {
6456 // If yes, then we can just branch to the label.
6457 Opc = isThumb
6458 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6459 : ARM::TAILJMPd;
6460 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6461 .addGlobalAddress(M.getNamedValue(MF.getName()));
6462 if (isThumb)
6463 MIB.add(predOps(ARMCC::AL));
6464 It = MBB.insert(It, MIB);
6465 return It;
6466 }
6467
6468 // Create the call instruction.
6469 Opc = isThumb ? ARM::tBL : ARM::BL;
6470 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6471 if (isThumb)
6472 CallMIB.add(predOps(ARMCC::AL));
6473 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6474
6475 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6476 C.CallConstructionID == MachineOutlinerThunk) {
6477 // No, so just insert the call.
6478 It = MBB.insert(It, CallMIB);
6479 return It;
6480 }
6481
6482 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6483 // Can we save to a register?
6484 if (C.CallConstructionID == MachineOutlinerRegSave) {
6485 Register Reg = findRegisterToSaveLRTo(C);
6486 assert(Reg != 0 && "No callee-saved register available?");
6487
6488 // Save and restore LR from that register.
6489 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6490 if (!AFI.isLRSpilled())
6492 .buildRegister(ARM::LR, Reg);
6493 CallPt = MBB.insert(It, CallMIB);
6494 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6495 if (!AFI.isLRSpilled())
6497 It--;
6498 return CallPt;
6499 }
6500 // We have the default case. Save and restore from SP.
6501 if (!MBB.isLiveIn(ARM::LR))
6502 MBB.addLiveIn(ARM::LR);
6503 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6504 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6505 CallPt = MBB.insert(It, CallMIB);
6506 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6507 It--;
6508 return CallPt;
6509}
6510
6512 MachineFunction &MF) const {
6513 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6514}
6515
6516bool ARMBaseInstrInfo::isReMaterializableImpl(
6517 const MachineInstr &MI) const {
6518 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6519 // the tail predication conversion. This means that the element count
6520 // register has to be live for longer, but that has to be better than
6521 // spill/restore and VPT predication.
6522 return (isVCTP(&MI) && !isPredicated(MI)) ||
6524}
6525
6527 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6528 : ARM::BLX;
6529}
6530
6532 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6533 : ARM::tBLXr;
6534}
6535
6537 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6538 : ARM::BLX_pred;
6539}
6540
6541namespace {
6542class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6543 MachineInstr *EndLoop, *LoopCount;
6544 MachineFunction *MF;
6545 const TargetInstrInfo *TII;
6546
6547 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6548 // [LAST_IS_USE] : last reference to register in schedule is a use
6549 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6550 static int constexpr MAX_STAGES = 30;
6551 static int constexpr LAST_IS_USE = MAX_STAGES;
6552 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6553 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6554 typedef std::map<Register, IterNeed> IterNeeds;
6555
6556 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6557 const IterNeeds &CIN);
6558 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6559
6560 // Meanings of the various stuff with loop types:
6561 // t2Bcc:
6562 // EndLoop = branch at end of original BB that will become a kernel
6563 // LoopCount = CC setter live into branch
6564 // t2LoopEnd:
6565 // EndLoop = branch at end of original BB
6566 // LoopCount = t2LoopDec
6567public:
6568 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6569 : EndLoop(EndLoop), LoopCount(LoopCount),
6570 MF(EndLoop->getParent()->getParent()),
6571 TII(MF->getSubtarget().getInstrInfo()) {}
6572
6573 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6574 // Only ignore the terminator.
6575 return MI == EndLoop || MI == LoopCount;
6576 }
6577
6578 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6579 if (tooMuchRegisterPressure(SSD, SMS))
6580 return false;
6581
6582 return true;
6583 }
6584
6585 std::optional<bool> createTripCountGreaterCondition(
6586 int TC, MachineBasicBlock &MBB,
6587 SmallVectorImpl<MachineOperand> &Cond) override {
6588
6589 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6590 Cond.push_back(EndLoop->getOperand(1));
6591 Cond.push_back(EndLoop->getOperand(2));
6592 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6594 }
6595 return {};
6596 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6597 // General case just lets the unrolled t2LoopDec do the subtraction and
6598 // therefore just needs to check if zero has been reached.
6599 MachineInstr *LoopDec = nullptr;
6600 for (auto &I : MBB.instrs())
6601 if (I.getOpcode() == ARM::t2LoopDec)
6602 LoopDec = &I;
6603 assert(LoopDec && "Unable to find copied LoopDec");
6604 // Check if we're done with the loop.
6605 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6606 .addReg(LoopDec->getOperand(0).getReg())
6607 .addImm(0)
6609 .addReg(ARM::NoRegister);
6611 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6612 return {};
6613 } else
6614 llvm_unreachable("Unknown EndLoop");
6615 }
6616
6617 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6618
6619 void adjustTripCount(int TripCountAdjust) override {}
6620};
6621
6622void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6623 const IterNeeds &CIN) {
6624 // Increase pressure by the amounts in CrossIterationNeeds
6625 for (const auto &N : CIN) {
6626 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6627 for (int I = 0; I < Cnt; ++I)
6630 }
6631 // Decrease pressure by the amounts in CrossIterationNeeds
6632 for (const auto &N : CIN) {
6633 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6634 for (int I = 0; I < Cnt; ++I)
6637 }
6638}
6639
6640bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6641 SMSchedule &SMS) {
6642 IterNeeds CrossIterationNeeds;
6643
6644 // Determine which values will be loop-carried after the schedule is
6645 // applied
6646
6647 for (auto &SU : SSD.SUnits) {
6648 const MachineInstr *MI = SU.getInstr();
6649 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6650 for (auto &S : SU.Succs)
6651 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6652 Register Reg = S.getReg();
6653 if (Reg.isVirtual())
6654 CrossIterationNeeds[Reg.id()].set(0);
6655 } else if (S.isAssignedRegDep()) {
6656 int OStg = SMS.stageScheduled(S.getSUnit());
6657 if (OStg >= 0 && OStg != Stg) {
6658 Register Reg = S.getReg();
6659 if (Reg.isVirtual())
6660 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6661 }
6662 }
6663 }
6664
6665 // Determine more-or-less what the proposed schedule (reversed) is going to
6666 // be; it might not be quite the same because the within-cycle ordering
6667 // created by SMSchedule depends upon changes to help with address offsets and
6668 // the like.
6669 std::vector<SUnit *> ProposedSchedule;
6670 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6671 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6672 ++Stage) {
6673 std::deque<SUnit *> Instrs =
6674 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6675 std::sort(Instrs.begin(), Instrs.end(),
6676 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6677 llvm::append_range(ProposedSchedule, Instrs);
6678 }
6679
6680 // Learn whether the last use/def of each cross-iteration register is a use or
6681 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6682 // and we do not have to add the pressure.
6683 for (auto *SU : ProposedSchedule)
6684 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6685 ++OperI) {
6686 auto MO = *OperI;
6687 if (!MO.isReg() || !MO.getReg())
6688 continue;
6689 Register Reg = MO.getReg();
6690 auto CIter = CrossIterationNeeds.find(Reg.id());
6691 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6692 CIter->second[SEEN_AS_LIVE])
6693 continue;
6694 if (MO.isDef() && !MO.isDead())
6695 CIter->second.set(SEEN_AS_LIVE);
6696 else if (MO.isUse())
6697 CIter->second.set(LAST_IS_USE);
6698 }
6699 for (auto &CI : CrossIterationNeeds)
6700 CI.second.reset(LAST_IS_USE);
6701
6702 RegionPressure RecRegPressure;
6703 RegPressureTracker RPTracker(RecRegPressure);
6704 RegisterClassInfo RegClassInfo;
6705 RegClassInfo.runOnMachineFunction(*MF);
6706 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6707 EndLoop->getParent()->end(), false, false);
6708
6709 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6710
6711 for (auto *SU : ProposedSchedule) {
6712 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6713 RPTracker.setPos(std::next(CurInstI));
6714 RPTracker.recede();
6715
6716 // Track what cross-iteration registers would be seen as live
6717 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6718 auto MO = *OperI;
6719 if (!MO.isReg() || !MO.getReg())
6720 continue;
6721 Register Reg = MO.getReg();
6722 if (MO.isDef() && !MO.isDead()) {
6723 auto CIter = CrossIterationNeeds.find(Reg.id());
6724 if (CIter != CrossIterationNeeds.end()) {
6725 CIter->second.reset(0);
6726 CIter->second.reset(SEEN_AS_LIVE);
6727 }
6728 }
6729 }
6730 for (auto &S : SU->Preds) {
6731 auto Stg = SMS.stageScheduled(SU);
6732 if (S.isAssignedRegDep()) {
6733 Register Reg = S.getReg();
6734 auto CIter = CrossIterationNeeds.find(Reg.id());
6735 if (CIter != CrossIterationNeeds.end()) {
6736 auto Stg2 = SMS.stageScheduled(S.getSUnit());
6737 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6738 if (Stg - Stg2 < MAX_STAGES)
6739 CIter->second.set(Stg - Stg2);
6740 CIter->second.set(SEEN_AS_LIVE);
6741 }
6742 }
6743 }
6744
6745 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6746 }
6747
6748 auto &P = RPTracker.getPressure().MaxSetPressure;
6749 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6750 // Exclude some Neon register classes.
6751 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6752 I == ARM::DTriple_with_qsub_0_in_QPR)
6753 continue;
6754
6755 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6756 return true;
6757 }
6758 }
6759 return false;
6760}
6761
6762} // namespace
6763
6764std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6767 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6768 if (Preheader == LoopBB)
6769 Preheader = *std::next(LoopBB->pred_begin());
6770
6771 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6772 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6773 // block. We need to determine the reaching definition of CPSR so that
6774 // it can be marked as non-pipelineable, allowing the pipeliner to force
6775 // it into stage 0 or give up if it cannot or will not do so.
6776 MachineInstr *CCSetter = nullptr;
6777 for (auto &L : LoopBB->instrs()) {
6778 if (L.isCall())
6779 return nullptr;
6780 if (isCPSRDefined(L))
6781 CCSetter = &L;
6782 }
6783 if (CCSetter)
6784 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6785 else
6786 return nullptr; // Unable to find the CC setter, so unable to guarantee
6787 // that pipeline will work
6788 }
6789
6790 // Recognize:
6791 // preheader:
6792 // %1 = t2DoopLoopStart %0
6793 // loop:
6794 // %2 = phi %1, <not loop>, %..., %loop
6795 // %3 = t2LoopDec %2, <imm>
6796 // t2LoopEnd %3, %loop
6797
6798 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6799 for (auto &L : LoopBB->instrs())
6800 if (L.isCall())
6801 return nullptr;
6802 else if (isVCTP(&L))
6803 return nullptr;
6804 Register LoopDecResult = I->getOperand(0).getReg();
6806 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
6807 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6808 return nullptr;
6809 MachineInstr *LoopStart = nullptr;
6810 for (auto &J : Preheader->instrs())
6811 if (J.getOpcode() == ARM::t2DoLoopStart)
6812 LoopStart = &J;
6813 if (!LoopStart)
6814 return nullptr;
6815 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
6816 }
6817 return nullptr;
6818}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State) const
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const ARMBaseRegisterInfo & getRegisterInfo() const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ARMBaseInstrInfo(const ARMSubtarget &STI, const ARMBaseRegisterInfo &TRI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isCortexA7() const
bool isSwift() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool isThumb2() const
bool isLikeA9() const
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool hasMinSize() const
bool isCortexA8() const
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
@ SingleIssue
Can load/store 1 register/cycle.
@ DoubleIssue
Can load/store 2 registers/cycle.
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:178
LLVM_ABI uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
Helper class for creating CFI instructions and inserting them into MIR.
void buildRegister(MCRegister Reg1, MCRegister Reg2) const
void buildRestore(MCRegister Reg) const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A debug info location.
Definition DebugLoc.h:124
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
bool hasDLLImportStorageClass() const
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
Describe properties that are true of each instruction in the target description file.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isCall() const
Return true if the instruction is a call.
unsigned short Opcode
unsigned getOpcode() const
Return the opcode number for this descriptor.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool isValid() const
isValid - Returns true until all the operands have been visited.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
union llvm::MachineConstantPoolEntry::@004270020304201266316354007027341142157160323045 Val
The constant itself.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
ArrayRef< MachineMemOperand * >::iterator mmo_iterator
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
bool isInsertSubreg() const
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void increaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
LLVM_ABI void decreaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition Register.h:60
constexpr unsigned id() const
Definition Register.h:100
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
ARMII - This namespace holds all of the target specific flags that instruction info tracks.
@ ThumbArithFlagSetting
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
AddrMode
ARM Addressing Modes.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
Define some predicates that are used for node matching.
Definition ARMEHABI.h:25
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
constexpr T rotr(T V, int R)
Definition bit.h:382
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
unsigned getBLXpredOpcode(const MachineFunction &MF)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static bool isJumpTableBranchOpcode(int Opc)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static bool isPopOpcode(int Opc)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
unsigned getUndefRegState(bool B)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1975
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.