LLVM 23.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98
99 // fp SIMD ops
100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108};
109
112 : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
113 Subtarget(STI) {
114 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
115 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
116 llvm_unreachable("Duplicated entries?");
117 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
118 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
119 }
120}
121
122// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
123// currently defaults to no prepass hazard recognizer.
126 const ScheduleDAG *DAG) const {
127 if (usePreRAHazardRecognizer()) {
128 const InstrItineraryData *II =
129 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
130 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
131 }
133}
134
135// Called during:
136// - pre-RA scheduling
137// - post-RA scheduling when FeatureUseMISched is set
139 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
141
142 // We would like to restrict this hazard recognizer to only
143 // post-RA scheduling; we can tell that we're post-RA because we don't
144 // track VRegLiveness.
145 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
146 // banks banked on bit 2. Assume that TCMs are in use.
147 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
149 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
150
151 // Not inserting ARMHazardRecognizerFPMLx because that would change
152 // legacy behavior
153
155 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
156 return MHR;
157}
158
159// Called during post-RA scheduling when FeatureUseMISched is not set
162 const ScheduleDAG *DAG) const {
164
165 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
166 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
167
169 if (BHR)
170 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
171 return MHR;
172}
173
174// Branch analysis.
175// Cond vector output format:
176// 0 elements indicates an unconditional branch
177// 2 elements indicates a conditional branch; the elements are
178// the condition to check and the CPSR.
179// 3 elements indicates a hardware loop end; the elements
180// are the opcode, the operand value to test, and a dummy
181// operand used to pad out to 3 operands.
184 MachineBasicBlock *&FBB,
186 bool AllowModify) const {
187 TBB = nullptr;
188 FBB = nullptr;
189
191 if (I == MBB.instr_begin())
192 return false; // Empty blocks are easy.
193 --I;
194
195 // Walk backwards from the end of the basic block until the branch is
196 // analyzed or we give up.
197 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
198 // Flag to be raised on unanalyzeable instructions. This is useful in cases
199 // where we want to clean up on the end of the basic block before we bail
200 // out.
201 bool CantAnalyze = false;
202
203 // Skip over DEBUG values, predicated nonterminators and speculation
204 // barrier terminators.
205 while (I->isDebugInstr() || !I->isTerminator() ||
206 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
207 I->getOpcode() == ARM::t2DoLoopStartTP){
208 if (I == MBB.instr_begin())
209 return false;
210 --I;
211 }
212
213 if (isIndirectBranchOpcode(I->getOpcode()) ||
214 isJumpTableBranchOpcode(I->getOpcode())) {
215 // Indirect branches and jump tables can't be analyzed, but we still want
216 // to clean up any instructions at the tail of the basic block.
217 CantAnalyze = true;
218 } else if (isUncondBranchOpcode(I->getOpcode())) {
219 TBB = I->getOperand(0).getMBB();
220 } else if (isCondBranchOpcode(I->getOpcode())) {
221 // Bail out if we encounter multiple conditional branches.
222 if (!Cond.empty())
223 return true;
224
225 assert(!FBB && "FBB should have been null.");
226 FBB = TBB;
227 TBB = I->getOperand(0).getMBB();
228 Cond.push_back(I->getOperand(1));
229 Cond.push_back(I->getOperand(2));
230 } else if (I->isReturn()) {
231 // Returns can't be analyzed, but we should run cleanup.
232 CantAnalyze = true;
233 } else if (I->getOpcode() == ARM::t2LoopEnd &&
234 MBB.getParent()
235 ->getSubtarget<ARMSubtarget>()
237 if (!Cond.empty())
238 return true;
239 FBB = TBB;
240 TBB = I->getOperand(1).getMBB();
241 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
242 Cond.push_back(I->getOperand(0));
243 Cond.push_back(MachineOperand::CreateImm(0));
244 } else {
245 // We encountered other unrecognized terminator. Bail out immediately.
246 return true;
247 }
248
249 // Cleanup code - to be run for unpredicated unconditional branches and
250 // returns.
251 if (!isPredicated(*I) &&
252 (isUncondBranchOpcode(I->getOpcode()) ||
253 isIndirectBranchOpcode(I->getOpcode()) ||
254 isJumpTableBranchOpcode(I->getOpcode()) ||
255 I->isReturn())) {
256 // Forget any previous condition branch information - it no longer applies.
257 Cond.clear();
258 FBB = nullptr;
259
260 // If we can modify the function, delete everything below this
261 // unconditional branch.
262 if (AllowModify) {
263 MachineBasicBlock::iterator DI = std::next(I);
264 while (DI != MBB.instr_end()) {
265 MachineInstr &InstToDelete = *DI;
266 ++DI;
267 // Speculation barriers must not be deleted.
268 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
269 continue;
270 InstToDelete.eraseFromParent();
271 }
272 }
273 }
274
275 if (CantAnalyze) {
276 // We may not be able to analyze the block, but we could still have
277 // an unconditional branch as the last instruction in the block, which
278 // just branches to layout successor. If this is the case, then just
279 // remove it if we're allowed to make modifications.
280 if (AllowModify && !isPredicated(MBB.back()) &&
281 isUncondBranchOpcode(MBB.back().getOpcode()) &&
282 TBB && MBB.isLayoutSuccessor(TBB))
284 return true;
285 }
286
287 if (I == MBB.instr_begin())
288 return false;
289
290 --I;
291 }
292
293 // We made it past the terminators without bailing out - we must have
294 // analyzed this branch successfully.
295 return false;
296}
297
299 int *BytesRemoved) const {
300 assert(!BytesRemoved && "code size not handled");
301
302 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
303 if (I == MBB.end())
304 return 0;
305
306 if (!isUncondBranchOpcode(I->getOpcode()) &&
307 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
308 return 0;
309
310 // Remove the branch.
311 I->eraseFromParent();
312
313 I = MBB.end();
314
315 if (I == MBB.begin()) return 1;
316 --I;
317 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
318 return 1;
319
320 // Remove the branch.
321 I->eraseFromParent();
322 return 2;
323}
324
329 const DebugLoc &DL,
330 int *BytesAdded) const {
331 assert(!BytesAdded && "code size not handled");
332 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
333 int BOpc = !AFI->isThumbFunction()
334 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
335 int BccOpc = !AFI->isThumbFunction()
336 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
337 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
338
339 // Shouldn't be a fall through.
340 assert(TBB && "insertBranch must not be told to insert a fallthrough");
341 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
342 "ARM branch conditions have two or three components!");
343
344 // For conditional branches, we use addOperand to preserve CPSR flags.
345
346 if (!FBB) {
347 if (Cond.empty()) { // Unconditional branch?
348 if (isThumb)
350 else
351 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
352 } else if (Cond.size() == 2) {
353 BuildMI(&MBB, DL, get(BccOpc))
354 .addMBB(TBB)
355 .addImm(Cond[0].getImm())
356 .add(Cond[1]);
357 } else
358 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
359 return 1;
360 }
361
362 // Two-way conditional branch.
363 if (Cond.size() == 2)
364 BuildMI(&MBB, DL, get(BccOpc))
365 .addMBB(TBB)
366 .addImm(Cond[0].getImm())
367 .add(Cond[1]);
368 else if (Cond.size() == 3)
369 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
370 if (isThumb)
371 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
372 else
373 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
374 return 2;
375}
376
379 if (Cond.size() == 2) {
380 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
381 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
382 return false;
383 }
384 return true;
385}
386
388 if (MI.isBundle()) {
390 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
391 while (++I != E && I->isInsideBundle()) {
392 int PIdx = I->findFirstPredOperandIdx();
393 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
394 return true;
395 }
396 return false;
397 }
398
399 int PIdx = MI.findFirstPredOperandIdx();
400 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
401}
402
404 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
405 const TargetRegisterInfo *TRI) const {
406
407 // First, let's see if there is a generic comment for this operand
408 std::string GenericComment =
410 if (!GenericComment.empty())
411 return GenericComment;
412
413 // If not, check if we have an immediate operand.
414 if (!Op.isImm())
415 return std::string();
416
417 // And print its corresponding condition code if the immediate is a
418 // predicate.
419 int FirstPredOp = MI.findFirstPredOperandIdx();
420 if (FirstPredOp != (int) OpIdx)
421 return std::string();
422
423 std::string CC = "CC::";
424 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
425 return CC;
426}
427
430 unsigned Opc = MI.getOpcode();
433 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
434 .addImm(Pred[0].getImm())
435 .addReg(Pred[1].getReg());
436 return true;
437 }
438
439 int PIdx = MI.findFirstPredOperandIdx();
440 if (PIdx != -1) {
441 MachineOperand &PMO = MI.getOperand(PIdx);
442 PMO.setImm(Pred[0].getImm());
443 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
444
445 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
446 // IT block. This affects how they are printed.
447 const MCInstrDesc &MCID = MI.getDesc();
448 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
449 assert(MCID.operands()[1].isOptionalDef() &&
450 "CPSR def isn't expected operand");
451 assert((MI.getOperand(1).isDead() ||
452 MI.getOperand(1).getReg() != ARM::CPSR) &&
453 "if conversion tried to stop defining used CPSR");
454 MI.getOperand(1).setReg(ARM::NoRegister);
455 }
456
457 return true;
458 }
459 return false;
460}
461
463 ArrayRef<MachineOperand> Pred2) const {
464 if (Pred1.size() > 2 || Pred2.size() > 2)
465 return false;
466
467 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
468 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
469 if (CC1 == CC2)
470 return true;
471
472 switch (CC1) {
473 default:
474 return false;
475 case ARMCC::AL:
476 return true;
477 case ARMCC::HS:
478 return CC2 == ARMCC::HI;
479 case ARMCC::LS:
480 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
481 case ARMCC::GE:
482 return CC2 == ARMCC::GT;
483 case ARMCC::LE:
484 return CC2 == ARMCC::LT;
485 }
486}
487
489 std::vector<MachineOperand> &Pred,
490 bool SkipDead) const {
491 bool Found = false;
492 for (const MachineOperand &MO : MI.operands()) {
493 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
494 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
495 if (ClobbersCPSR || IsCPSR) {
496
497 // Filter out T1 instructions that have a dead CPSR,
498 // allowing IT blocks to be generated containing T1 instructions
499 const MCInstrDesc &MCID = MI.getDesc();
500 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
501 SkipDead)
502 continue;
503
504 Pred.push_back(MO);
505 Found = true;
506 }
507 }
508
509 return Found;
510}
511
513 for (const auto &MO : MI.operands())
514 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
515 return true;
516 return false;
517}
518
520 switch (MI->getOpcode()) {
521 default: return true;
522 case ARM::tADC: // ADC (register) T1
523 case ARM::tADDi3: // ADD (immediate) T1
524 case ARM::tADDi8: // ADD (immediate) T2
525 case ARM::tADDrr: // ADD (register) T1
526 case ARM::tAND: // AND (register) T1
527 case ARM::tASRri: // ASR (immediate) T1
528 case ARM::tASRrr: // ASR (register) T1
529 case ARM::tBIC: // BIC (register) T1
530 case ARM::tEOR: // EOR (register) T1
531 case ARM::tLSLri: // LSL (immediate) T1
532 case ARM::tLSLrr: // LSL (register) T1
533 case ARM::tLSRri: // LSR (immediate) T1
534 case ARM::tLSRrr: // LSR (register) T1
535 case ARM::tMUL: // MUL T1
536 case ARM::tMVN: // MVN (register) T1
537 case ARM::tORR: // ORR (register) T1
538 case ARM::tROR: // ROR (register) T1
539 case ARM::tRSB: // RSB (immediate) T1
540 case ARM::tSBC: // SBC (register) T1
541 case ARM::tSUBi3: // SUB (immediate) T1
542 case ARM::tSUBi8: // SUB (immediate) T2
543 case ARM::tSUBrr: // SUB (register) T1
545 }
546}
547
548/// isPredicable - Return true if the specified instruction can be predicated.
549/// By default, this returns true for every instruction with a
550/// PredicateOperand.
552 if (!MI.isPredicable())
553 return false;
554
555 if (MI.isBundle())
556 return false;
557
559 return false;
560
561 const MachineFunction *MF = MI.getParent()->getParent();
562 const ARMFunctionInfo *AFI =
564
565 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
566 // In their ARM encoding, they can't be encoded in a conditional form.
567 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
568 return false;
569
570 // Make indirect control flow changes unpredictable when SLS mitigation is
571 // enabled.
572 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
573 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
574 return false;
575 if (ST.hardenSlsBlr() && isIndirectCall(MI))
576 return false;
577
578 if (AFI->isThumb2Function()) {
579 if (getSubtarget().restrictIT())
580 return isV8EligibleForIT(&MI);
581 }
582
583 return true;
584}
585
586namespace llvm {
587
588template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
589 for (const MachineOperand &MO : MI->operands()) {
590 if (!MO.isReg() || MO.isUndef() || MO.isUse())
591 continue;
592 if (MO.getReg() != ARM::CPSR)
593 continue;
594 if (!MO.isDead())
595 return false;
596 }
597 // all definitions of CPSR are dead
598 return true;
599}
600
601} // end namespace llvm
602
603/// GetInstSize - Return the size of the specified MachineInstr.
604///
606 const MachineBasicBlock &MBB = *MI.getParent();
607 const MachineFunction *MF = MBB.getParent();
608 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
609
610 const MCInstrDesc &MCID = MI.getDesc();
611
612 switch (MI.getOpcode()) {
613 default:
614 // Return the size specified in .td file. If there's none, return 0, as we
615 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
616 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
617 // contrast to AArch64 instructions which have a default size of 4 bytes for
618 // example.
619 return MCID.getSize();
620 case TargetOpcode::BUNDLE:
621 return getInstBundleSize(MI);
622 case TargetOpcode::COPY:
624 return 4;
625 else
626 return 2;
627 case ARM::CONSTPOOL_ENTRY:
628 case ARM::JUMPTABLE_INSTS:
629 case ARM::JUMPTABLE_ADDRS:
630 case ARM::JUMPTABLE_TBB:
631 case ARM::JUMPTABLE_TBH:
632 // If this machine instr is a constant pool entry, its size is recorded as
633 // operand #2.
634 return MI.getOperand(2).getImm();
635 case ARM::SPACE:
636 return MI.getOperand(1).getImm();
637 case ARM::INLINEASM:
638 case ARM::INLINEASM_BR: {
639 // If this machine instr is an inline asm, measure it.
640 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
642 Size = alignTo(Size, 4);
643 return Size;
644 }
645 }
646}
647
650 MCRegister DestReg, bool KillSrc,
651 const ARMSubtarget &Subtarget) const {
652 unsigned Opc = Subtarget.isThumb()
653 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
654 : ARM::MRS;
655
657 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
658
659 // There is only 1 A/R class MRS instruction, and it always refers to
660 // APSR. However, there are lots of other possibilities on M-class cores.
661 if (Subtarget.isMClass())
662 MIB.addImm(0x800);
663
664 MIB.add(predOps(ARMCC::AL))
665 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
666}
667
670 MCRegister SrcReg, bool KillSrc,
671 const ARMSubtarget &Subtarget) const {
672 unsigned Opc = Subtarget.isThumb()
673 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
674 : ARM::MSR;
675
676 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
677
678 if (Subtarget.isMClass())
679 MIB.addImm(0x800);
680 else
681 MIB.addImm(8);
682
683 MIB.addReg(SrcReg, getKillRegState(KillSrc))
686}
687
689 MIB.addImm(ARMVCC::None);
690 MIB.addReg(0);
691 MIB.addReg(0); // tp_reg
692}
693
699
701 MIB.addImm(Cond);
702 MIB.addReg(ARM::VPR, RegState::Implicit);
703 MIB.addReg(0); // tp_reg
704}
705
707 unsigned Cond, unsigned Inactive) {
709 MIB.addReg(Inactive);
710}
711
714 const DebugLoc &DL, Register DestReg,
715 Register SrcReg, bool KillSrc,
716 bool RenamableDest,
717 bool RenamableSrc) const {
718 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
719 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
720
721 if (GPRDest && GPRSrc) {
722 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
723 .addReg(SrcReg, getKillRegState(KillSrc))
725 .add(condCodeOp());
726 return;
727 }
728
729 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
730 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
731
732 unsigned Opc = 0;
733 if (SPRDest && SPRSrc)
734 Opc = ARM::VMOVS;
735 else if (GPRDest && SPRSrc)
736 Opc = ARM::VMOVRS;
737 else if (SPRDest && GPRSrc)
738 Opc = ARM::VMOVSR;
739 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
740 Opc = ARM::VMOVD;
741 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
742 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
743
744 if (Opc) {
745 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
746 MIB.addReg(SrcReg, getKillRegState(KillSrc));
747 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
748 MIB.addReg(SrcReg, getKillRegState(KillSrc));
749 if (Opc == ARM::MVE_VORR)
750 addUnpredicatedMveVpredROp(MIB, DestReg);
751 else if (Opc != ARM::MQPRCopy)
752 MIB.add(predOps(ARMCC::AL));
753 return;
754 }
755
756 // Handle register classes that require multiple instructions.
757 unsigned BeginIdx = 0;
758 unsigned SubRegs = 0;
759 int Spacing = 1;
760
761 // Use VORRq when possible.
762 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
763 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
764 BeginIdx = ARM::qsub_0;
765 SubRegs = 2;
766 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
767 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
768 BeginIdx = ARM::qsub_0;
769 SubRegs = 4;
770 // Fall back to VMOVD.
771 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
772 Opc = ARM::VMOVD;
773 BeginIdx = ARM::dsub_0;
774 SubRegs = 2;
775 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
776 Opc = ARM::VMOVD;
777 BeginIdx = ARM::dsub_0;
778 SubRegs = 3;
779 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
780 Opc = ARM::VMOVD;
781 BeginIdx = ARM::dsub_0;
782 SubRegs = 4;
783 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
784 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
785 BeginIdx = ARM::gsub_0;
786 SubRegs = 2;
787 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
788 Opc = ARM::VMOVD;
789 BeginIdx = ARM::dsub_0;
790 SubRegs = 2;
791 Spacing = 2;
792 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
793 Opc = ARM::VMOVD;
794 BeginIdx = ARM::dsub_0;
795 SubRegs = 3;
796 Spacing = 2;
797 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
798 Opc = ARM::VMOVD;
799 BeginIdx = ARM::dsub_0;
800 SubRegs = 4;
801 Spacing = 2;
802 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
803 !Subtarget.hasFP64()) {
804 Opc = ARM::VMOVS;
805 BeginIdx = ARM::ssub_0;
806 SubRegs = 2;
807 } else if (SrcReg == ARM::CPSR) {
808 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
809 return;
810 } else if (DestReg == ARM::CPSR) {
811 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
812 return;
813 } else if (DestReg == ARM::VPR) {
814 assert(ARM::GPRRegClass.contains(SrcReg));
815 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
816 .addReg(SrcReg, getKillRegState(KillSrc))
818 return;
819 } else if (SrcReg == ARM::VPR) {
820 assert(ARM::GPRRegClass.contains(DestReg));
821 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
822 .addReg(SrcReg, getKillRegState(KillSrc))
824 return;
825 } else if (DestReg == ARM::FPSCR_NZCV) {
826 assert(ARM::GPRRegClass.contains(SrcReg));
827 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
828 .addReg(SrcReg, getKillRegState(KillSrc))
830 return;
831 } else if (SrcReg == ARM::FPSCR_NZCV) {
832 assert(ARM::GPRRegClass.contains(DestReg));
833 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
834 .addReg(SrcReg, getKillRegState(KillSrc))
836 return;
837 }
838
839 assert(Opc && "Impossible reg-to-reg copy");
840
843
844 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
845 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
846 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
847 Spacing = -Spacing;
848 }
849#ifndef NDEBUG
850 SmallSet<unsigned, 4> DstRegs;
851#endif
852 for (unsigned i = 0; i != SubRegs; ++i) {
853 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
854 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
855 assert(Dst && Src && "Bad sub-register");
856#ifndef NDEBUG
857 assert(!DstRegs.count(Src) && "destructive vector copy");
858 DstRegs.insert(Dst);
859#endif
860 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
861 // VORR (NEON or MVE) takes two source operands.
862 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
863 Mov.addReg(Src);
864 }
865 // MVE VORR takes predicate operands in place of an ordinary condition.
866 if (Opc == ARM::MVE_VORR)
868 else
869 Mov = Mov.add(predOps(ARMCC::AL));
870 // MOVr can set CC.
871 if (Opc == ARM::MOVr)
872 Mov = Mov.add(condCodeOp());
873 }
874 // Add implicit super-register defs and kills to the last instruction.
875 Mov->addRegisterDefined(DestReg, TRI);
876 if (KillSrc)
877 Mov->addRegisterKilled(SrcReg, TRI);
878}
879
880std::optional<DestSourcePair>
882 // VMOVRRD is also a copy instruction but it requires
883 // special way of handling. It is more complex copy version
884 // and since that we are not considering it. For recognition
885 // of such instruction isExtractSubregLike MI interface function
886 // could be used.
887 // VORRq is considered as a move only if two inputs are
888 // the same register.
889 if (!MI.isMoveReg() ||
890 (MI.getOpcode() == ARM::VORRq &&
891 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
892 return std::nullopt;
893 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
894}
895
896std::optional<ParamLoadedValue>
898 Register Reg) const {
899 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
900 Register DstReg = DstSrcPair->Destination->getReg();
901
902 // TODO: We don't handle cases where the forwarding reg is narrower/wider
903 // than the copy registers. Consider for example:
904 //
905 // s16 = VMOVS s0
906 // s17 = VMOVS s1
907 // call @callee(d0)
908 //
909 // We'd like to describe the call site value of d0 as d8, but this requires
910 // gathering and merging the descriptions for the two VMOVS instructions.
911 //
912 // We also don't handle the reverse situation, where the forwarding reg is
913 // narrower than the copy destination:
914 //
915 // d8 = VMOVD d0
916 // call @callee(s1)
917 //
918 // We need to produce a fragment description (the call site value of s1 is
919 // /not/ just d8).
920 if (DstReg != Reg)
921 return std::nullopt;
922 }
924}
925
927 unsigned Reg,
928 unsigned SubIdx,
929 RegState State) const {
930 if (!SubIdx)
931 return MIB.addReg(Reg, State);
932
934 return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State);
935 return MIB.addReg(Reg, State, SubIdx);
936}
937
940 Register SrcReg, bool isKill, int FI,
941 const TargetRegisterClass *RC,
942 Register VReg,
943 MachineInstr::MIFlag Flags) const {
944 MachineFunction &MF = *MBB.getParent();
945 MachineFrameInfo &MFI = MF.getFrameInfo();
946 Align Alignment = MFI.getObjectAlign(FI);
948
951 MFI.getObjectSize(FI), Alignment);
952
953 switch (TRI.getSpillSize(*RC)) {
954 case 2:
955 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
956 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
957 .addReg(SrcReg, getKillRegState(isKill))
958 .addFrameIndex(FI)
959 .addImm(0)
960 .addMemOperand(MMO)
962 } else
963 llvm_unreachable("Unknown reg class!");
964 break;
965 case 4:
966 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
967 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
968 .addReg(SrcReg, getKillRegState(isKill))
969 .addFrameIndex(FI)
970 .addImm(0)
971 .addMemOperand(MMO)
973 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
974 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
975 .addReg(SrcReg, getKillRegState(isKill))
976 .addFrameIndex(FI)
977 .addImm(0)
978 .addMemOperand(MMO)
980 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
981 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
982 .addReg(SrcReg, getKillRegState(isKill))
983 .addFrameIndex(FI)
984 .addImm(0)
985 .addMemOperand(MMO)
987 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
988 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
989 .addReg(SrcReg, getKillRegState(isKill))
990 .addFrameIndex(FI)
991 .addImm(0)
992 .addMemOperand(MMO)
994 } else
995 llvm_unreachable("Unknown reg class!");
996 break;
997 case 8:
998 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
999 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1000 .addReg(SrcReg, getKillRegState(isKill))
1001 .addFrameIndex(FI)
1002 .addImm(0)
1003 .addMemOperand(MMO)
1005 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1006 if (Subtarget.hasV5TEOps()) {
1007 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1008 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1009 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1010 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1012 } else {
1013 // Fallback to STM instruction, which has existed since the dawn of
1014 // time.
1015 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1016 .addFrameIndex(FI)
1017 .addMemOperand(MMO)
1019 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1020 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1021 }
1022 } else
1023 llvm_unreachable("Unknown reg class!");
1024 break;
1025 case 16:
1026 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1027 // Use aligned spills if the stack can be realigned.
1028 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1029 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1030 .addFrameIndex(FI)
1031 .addImm(16)
1032 .addReg(SrcReg, getKillRegState(isKill))
1033 .addMemOperand(MMO)
1035 } else {
1036 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1037 .addReg(SrcReg, getKillRegState(isKill))
1038 .addFrameIndex(FI)
1039 .addMemOperand(MMO)
1041 }
1042 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1043 Subtarget.hasMVEIntegerOps()) {
1044 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1045 MIB.addReg(SrcReg, getKillRegState(isKill))
1046 .addFrameIndex(FI)
1047 .addImm(0)
1048 .addMemOperand(MMO);
1050 } else
1051 llvm_unreachable("Unknown reg class!");
1052 break;
1053 case 24:
1054 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1055 // Use aligned spills if the stack can be realigned.
1056 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1057 Subtarget.hasNEON()) {
1058 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1059 .addFrameIndex(FI)
1060 .addImm(16)
1061 .addReg(SrcReg, getKillRegState(isKill))
1062 .addMemOperand(MMO)
1064 } else {
1066 get(ARM::VSTMDIA))
1067 .addFrameIndex(FI)
1069 .addMemOperand(MMO);
1070 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1071 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1072 AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1073 }
1074 } else
1075 llvm_unreachable("Unknown reg class!");
1076 break;
1077 case 32:
1078 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1079 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1080 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1081 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1082 Subtarget.hasNEON()) {
1083 // FIXME: It's possible to only store part of the QQ register if the
1084 // spilled def has a sub-register index.
1085 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1086 .addFrameIndex(FI)
1087 .addImm(16)
1088 .addReg(SrcReg, getKillRegState(isKill))
1089 .addMemOperand(MMO)
1091 } else if (Subtarget.hasMVEIntegerOps()) {
1092 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1093 .addReg(SrcReg, getKillRegState(isKill))
1094 .addFrameIndex(FI)
1095 .addMemOperand(MMO);
1096 } else {
1098 get(ARM::VSTMDIA))
1099 .addFrameIndex(FI)
1101 .addMemOperand(MMO);
1102 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1103 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1104 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1105 AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1106 }
1107 } else
1108 llvm_unreachable("Unknown reg class!");
1109 break;
1110 case 64:
1111 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1112 Subtarget.hasMVEIntegerOps()) {
1113 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1114 .addReg(SrcReg, getKillRegState(isKill))
1115 .addFrameIndex(FI)
1116 .addMemOperand(MMO);
1117 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1118 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1119 .addFrameIndex(FI)
1121 .addMemOperand(MMO);
1122 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1123 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1124 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1125 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1126 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, {});
1127 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, {});
1128 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, {});
1129 AddDReg(MIB, SrcReg, ARM::dsub_7, {});
1130 } else
1131 llvm_unreachable("Unknown reg class!");
1132 break;
1133 default:
1134 llvm_unreachable("Unknown reg class!");
1135 }
1136}
1137
1139 int &FrameIndex) const {
1140 switch (MI.getOpcode()) {
1141 default: break;
1142 case ARM::STRrs:
1143 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1144 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1145 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1146 MI.getOperand(3).getImm() == 0) {
1147 FrameIndex = MI.getOperand(1).getIndex();
1148 return MI.getOperand(0).getReg();
1149 }
1150 break;
1151 case ARM::STRi12:
1152 case ARM::t2STRi12:
1153 case ARM::tSTRspi:
1154 case ARM::VSTRD:
1155 case ARM::VSTRS:
1156 case ARM::VSTRH:
1157 case ARM::VSTR_P0_off:
1158 case ARM::VSTR_FPSCR_NZCVQC_off:
1159 case ARM::MVE_VSTRWU32:
1160 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1161 MI.getOperand(2).getImm() == 0) {
1162 FrameIndex = MI.getOperand(1).getIndex();
1163 return MI.getOperand(0).getReg();
1164 }
1165 break;
1166 case ARM::VST1q64:
1167 case ARM::VST1d64TPseudo:
1168 case ARM::VST1d64QPseudo:
1169 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1170 FrameIndex = MI.getOperand(0).getIndex();
1171 return MI.getOperand(2).getReg();
1172 }
1173 break;
1174 case ARM::VSTMQIA:
1175 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1176 FrameIndex = MI.getOperand(1).getIndex();
1177 return MI.getOperand(0).getReg();
1178 }
1179 break;
1180 case ARM::MQQPRStore:
1181 case ARM::MQQQQPRStore:
1182 if (MI.getOperand(1).isFI()) {
1183 FrameIndex = MI.getOperand(1).getIndex();
1184 return MI.getOperand(0).getReg();
1185 }
1186 break;
1187 }
1188
1189 return 0;
1190}
1191
1193 int &FrameIndex) const {
1195 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1196 Accesses.size() == 1) {
1197 FrameIndex =
1198 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1199 ->getFrameIndex();
1200 return true;
1201 }
1202 return false;
1203}
1204
1207 Register DestReg, int FI,
1208 const TargetRegisterClass *RC,
1209 Register VReg, unsigned SubReg,
1210 MachineInstr::MIFlag Flags) const {
1211 DebugLoc DL;
1212 if (I != MBB.end()) DL = I->getDebugLoc();
1213 MachineFunction &MF = *MBB.getParent();
1214 MachineFrameInfo &MFI = MF.getFrameInfo();
1215 const Align Alignment = MFI.getObjectAlign(FI);
1218 MFI.getObjectSize(FI), Alignment);
1219
1221 switch (TRI.getSpillSize(*RC)) {
1222 case 2:
1223 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1224 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1225 .addFrameIndex(FI)
1226 .addImm(0)
1227 .addMemOperand(MMO)
1229 } else
1230 llvm_unreachable("Unknown reg class!");
1231 break;
1232 case 4:
1233 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1234 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1235 .addFrameIndex(FI)
1236 .addImm(0)
1237 .addMemOperand(MMO)
1239 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1240 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1241 .addFrameIndex(FI)
1242 .addImm(0)
1243 .addMemOperand(MMO)
1245 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1246 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1247 .addFrameIndex(FI)
1248 .addImm(0)
1249 .addMemOperand(MMO)
1251 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1252 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1253 .addFrameIndex(FI)
1254 .addImm(0)
1255 .addMemOperand(MMO)
1257 } else
1258 llvm_unreachable("Unknown reg class!");
1259 break;
1260 case 8:
1261 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1262 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1263 .addFrameIndex(FI)
1264 .addImm(0)
1265 .addMemOperand(MMO)
1267 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1269
1270 if (Subtarget.hasV5TEOps()) {
1271 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1272 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1273 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1274 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1276 } else {
1277 // Fallback to LDM instruction, which has existed since the dawn of
1278 // time.
1279 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1280 .addFrameIndex(FI)
1281 .addMemOperand(MMO)
1283 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1284 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1285 }
1286
1287 if (DestReg.isPhysical())
1288 MIB.addReg(DestReg, RegState::ImplicitDefine);
1289 } else
1290 llvm_unreachable("Unknown reg class!");
1291 break;
1292 case 16:
1293 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1294 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1295 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1296 .addFrameIndex(FI)
1297 .addImm(16)
1298 .addMemOperand(MMO)
1300 } else {
1301 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1302 .addFrameIndex(FI)
1303 .addMemOperand(MMO)
1305 }
1306 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1307 Subtarget.hasMVEIntegerOps()) {
1308 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1309 MIB.addFrameIndex(FI)
1310 .addImm(0)
1311 .addMemOperand(MMO);
1313 } else
1314 llvm_unreachable("Unknown reg class!");
1315 break;
1316 case 24:
1317 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1318 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1319 Subtarget.hasNEON()) {
1320 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1321 .addFrameIndex(FI)
1322 .addImm(16)
1323 .addMemOperand(MMO)
1325 } else {
1326 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1327 .addFrameIndex(FI)
1328 .addMemOperand(MMO)
1330 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1331 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1332 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1333 if (DestReg.isPhysical())
1334 MIB.addReg(DestReg, RegState::ImplicitDefine);
1335 }
1336 } else
1337 llvm_unreachable("Unknown reg class!");
1338 break;
1339 case 32:
1340 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1341 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1342 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1343 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1344 Subtarget.hasNEON()) {
1345 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1346 .addFrameIndex(FI)
1347 .addImm(16)
1348 .addMemOperand(MMO)
1350 } else if (Subtarget.hasMVEIntegerOps()) {
1351 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1352 .addFrameIndex(FI)
1353 .addMemOperand(MMO);
1354 } else {
1355 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1356 .addFrameIndex(FI)
1358 .addMemOperand(MMO);
1359 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1360 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1361 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1362 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1363 if (DestReg.isPhysical())
1364 MIB.addReg(DestReg, RegState::ImplicitDefine);
1365 }
1366 } else
1367 llvm_unreachable("Unknown reg class!");
1368 break;
1369 case 64:
1370 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1371 Subtarget.hasMVEIntegerOps()) {
1372 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1373 .addFrameIndex(FI)
1374 .addMemOperand(MMO);
1375 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1376 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1377 .addFrameIndex(FI)
1379 .addMemOperand(MMO);
1380 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1381 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1382 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1383 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1384 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead);
1385 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead);
1386 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead);
1387 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead);
1388 if (DestReg.isPhysical())
1389 MIB.addReg(DestReg, RegState::ImplicitDefine);
1390 } else
1391 llvm_unreachable("Unknown reg class!");
1392 break;
1393 default:
1394 llvm_unreachable("Unknown regclass!");
1395 }
1396}
1397
1399 int &FrameIndex) const {
1400 switch (MI.getOpcode()) {
1401 default: break;
1402 case ARM::LDRrs:
1403 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1404 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1405 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1406 MI.getOperand(3).getImm() == 0) {
1407 FrameIndex = MI.getOperand(1).getIndex();
1408 return MI.getOperand(0).getReg();
1409 }
1410 break;
1411 case ARM::LDRi12:
1412 case ARM::t2LDRi12:
1413 case ARM::tLDRspi:
1414 case ARM::VLDRD:
1415 case ARM::VLDRS:
1416 case ARM::VLDRH:
1417 case ARM::VLDR_P0_off:
1418 case ARM::VLDR_FPSCR_NZCVQC_off:
1419 case ARM::MVE_VLDRWU32:
1420 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1421 MI.getOperand(2).getImm() == 0) {
1422 FrameIndex = MI.getOperand(1).getIndex();
1423 return MI.getOperand(0).getReg();
1424 }
1425 break;
1426 case ARM::VLD1q64:
1427 case ARM::VLD1d8TPseudo:
1428 case ARM::VLD1d16TPseudo:
1429 case ARM::VLD1d32TPseudo:
1430 case ARM::VLD1d64TPseudo:
1431 case ARM::VLD1d8QPseudo:
1432 case ARM::VLD1d16QPseudo:
1433 case ARM::VLD1d32QPseudo:
1434 case ARM::VLD1d64QPseudo:
1435 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1436 FrameIndex = MI.getOperand(1).getIndex();
1437 return MI.getOperand(0).getReg();
1438 }
1439 break;
1440 case ARM::VLDMQIA:
1441 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1442 FrameIndex = MI.getOperand(1).getIndex();
1443 return MI.getOperand(0).getReg();
1444 }
1445 break;
1446 case ARM::MQQPRLoad:
1447 case ARM::MQQQQPRLoad:
1448 if (MI.getOperand(1).isFI()) {
1449 FrameIndex = MI.getOperand(1).getIndex();
1450 return MI.getOperand(0).getReg();
1451 }
1452 break;
1453 }
1454
1455 return 0;
1456}
1457
1459 int &FrameIndex) const {
1461 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1462 Accesses.size() == 1) {
1463 FrameIndex =
1464 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1465 ->getFrameIndex();
1466 return true;
1467 }
1468 return false;
1469}
1470
1471/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1472/// depending on whether the result is used.
1473void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1474 bool isThumb1 = Subtarget.isThumb1Only();
1475 bool isThumb2 = Subtarget.isThumb2();
1476 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1477
1478 DebugLoc dl = MI->getDebugLoc();
1479 MachineBasicBlock *BB = MI->getParent();
1480
1481 MachineInstrBuilder LDM, STM;
1482 if (isThumb1 || !MI->getOperand(1).isDead()) {
1483 MachineOperand LDWb(MI->getOperand(1));
1484 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1485 : isThumb1 ? ARM::tLDMIA_UPD
1486 : ARM::LDMIA_UPD))
1487 .add(LDWb);
1488 } else {
1489 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1490 }
1491
1492 if (isThumb1 || !MI->getOperand(0).isDead()) {
1493 MachineOperand STWb(MI->getOperand(0));
1494 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1495 : isThumb1 ? ARM::tSTMIA_UPD
1496 : ARM::STMIA_UPD))
1497 .add(STWb);
1498 } else {
1499 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1500 }
1501
1502 MachineOperand LDBase(MI->getOperand(3));
1503 LDM.add(LDBase).add(predOps(ARMCC::AL));
1504
1505 MachineOperand STBase(MI->getOperand(2));
1506 STM.add(STBase).add(predOps(ARMCC::AL));
1507
1508 // Sort the scratch registers into ascending order.
1509 const TargetRegisterInfo &TRI = getRegisterInfo();
1510 SmallVector<unsigned, 6> ScratchRegs;
1511 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1512 ScratchRegs.push_back(MO.getReg());
1513 llvm::sort(ScratchRegs,
1514 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1515 return TRI.getEncodingValue(Reg1) <
1516 TRI.getEncodingValue(Reg2);
1517 });
1518
1519 for (const auto &Reg : ScratchRegs) {
1522 }
1523
1524 BB->erase(MI);
1525}
1526
1528 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1529 expandLoadStackGuard(MI);
1530 MI.getParent()->erase(MI);
1531 return true;
1532 }
1533
1534 if (MI.getOpcode() == ARM::MEMCPY) {
1535 expandMEMCPY(MI);
1536 return true;
1537 }
1538
1539 // This hook gets to expand COPY instructions before they become
1540 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1541 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1542 // changed into a VORR that can go down the NEON pipeline.
1543 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1544 return false;
1545
1546 // Look for a copy between even S-registers. That is where we keep floats
1547 // when using NEON v2f32 instructions for f32 arithmetic.
1548 Register DstRegS = MI.getOperand(0).getReg();
1549 Register SrcRegS = MI.getOperand(1).getReg();
1550 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1551 return false;
1552
1554 MCRegister DstRegD =
1555 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1556 MCRegister SrcRegD =
1557 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1558 if (!DstRegD || !SrcRegD)
1559 return false;
1560
1561 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1562 // legal if the COPY already defines the full DstRegD, and it isn't a
1563 // sub-register insertion.
1564 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1565 return false;
1566
1567 // A dead copy shouldn't show up here, but reject it just in case.
1568 if (MI.getOperand(0).isDead())
1569 return false;
1570
1571 // All clear, widen the COPY.
1572 LLVM_DEBUG(dbgs() << "widening: " << MI);
1573 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1574
1575 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1576 // or some other super-register.
1577 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1578 if (ImpDefIdx != -1)
1579 MI.removeOperand(ImpDefIdx);
1580
1581 // Change the opcode and operands.
1582 MI.setDesc(get(ARM::VMOVD));
1583 MI.getOperand(0).setReg(DstRegD);
1584 MI.getOperand(1).setReg(SrcRegD);
1585 MIB.add(predOps(ARMCC::AL));
1586
1587 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1588 // register scavenger and machine verifier, so we need to indicate that we
1589 // are reading an undefined value from SrcRegD, but a proper value from
1590 // SrcRegS.
1591 MI.getOperand(1).setIsUndef();
1592 MIB.addReg(SrcRegS, RegState::Implicit);
1593
1594 // SrcRegD may actually contain an unrelated value in the ssub_1
1595 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1596 if (MI.getOperand(1).isKill()) {
1597 MI.getOperand(1).setIsKill(false);
1598 MI.addRegisterKilled(SrcRegS, TRI, true);
1599 }
1600
1601 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1602 return true;
1603}
1604
1605/// Create a copy of a const pool value. Update CPI to the new index and return
1606/// the label UID.
1607static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1610
1611 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1612 assert(MCPE.isMachineConstantPoolEntry() &&
1613 "Expecting a machine constantpool entry!");
1614 ARMConstantPoolValue *ACPV =
1615 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1616
1617 unsigned PCLabelId = AFI->createPICLabelUId();
1618 ARMConstantPoolValue *NewCPV = nullptr;
1619
1620 // FIXME: The below assumes PIC relocation model and that the function
1621 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1622 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1623 // instructions, so that's probably OK, but is PIC always correct when
1624 // we get here?
1625 if (ACPV->isGlobalValue())
1627 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1628 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1629 else if (ACPV->isExtSymbol())
1632 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1633 else if (ACPV->isBlockAddress())
1635 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1637 else if (ACPV->isLSDA())
1638 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1639 ARMCP::CPLSDA, 4);
1640 else if (ACPV->isMachineBasicBlock())
1641 NewCPV = ARMConstantPoolMBB::
1643 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1644 else
1645 llvm_unreachable("Unexpected ARM constantpool value type!!");
1646 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1647 return PCLabelId;
1648}
1649
1652 Register DestReg, unsigned SubIdx,
1653 const MachineInstr &Orig,
1654 LaneBitmask UsedLanes) const {
1655 unsigned Opcode = Orig.getOpcode();
1656 switch (Opcode) {
1657 default: {
1658 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1659 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1660 MBB.insert(I, MI);
1661 break;
1662 }
1663 case ARM::tLDRpci_pic:
1664 case ARM::t2LDRpci_pic: {
1665 MachineFunction &MF = *MBB.getParent();
1666 unsigned CPI = Orig.getOperand(1).getIndex();
1667 unsigned PCLabelId = duplicateCPV(MF, CPI);
1668 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1670 .addImm(PCLabelId)
1671 .cloneMemRefs(Orig);
1672 break;
1673 }
1674 }
1675}
1676
1679 MachineBasicBlock::iterator InsertBefore,
1680 const MachineInstr &Orig) const {
1681 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1683 for (;;) {
1684 switch (I->getOpcode()) {
1685 case ARM::tLDRpci_pic:
1686 case ARM::t2LDRpci_pic: {
1687 MachineFunction &MF = *MBB.getParent();
1688 unsigned CPI = I->getOperand(1).getIndex();
1689 unsigned PCLabelId = duplicateCPV(MF, CPI);
1690 I->getOperand(1).setIndex(CPI);
1691 I->getOperand(2).setImm(PCLabelId);
1692 break;
1693 }
1694 }
1695 if (!I->isBundledWithSucc())
1696 break;
1697 ++I;
1698 }
1699 return Cloned;
1700}
1701
1703 const MachineInstr &MI1,
1704 const MachineRegisterInfo *MRI) const {
1705 unsigned Opcode = MI0.getOpcode();
1706 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1707 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1708 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1709 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1710 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1711 Opcode == ARM::t2MOV_ga_pcrel) {
1712 if (MI1.getOpcode() != Opcode)
1713 return false;
1714 if (MI0.getNumOperands() != MI1.getNumOperands())
1715 return false;
1716
1717 const MachineOperand &MO0 = MI0.getOperand(1);
1718 const MachineOperand &MO1 = MI1.getOperand(1);
1719 if (MO0.getOffset() != MO1.getOffset())
1720 return false;
1721
1722 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1723 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1724 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1725 Opcode == ARM::t2MOV_ga_pcrel)
1726 // Ignore the PC labels.
1727 return MO0.getGlobal() == MO1.getGlobal();
1728
1729 const MachineFunction *MF = MI0.getParent()->getParent();
1730 const MachineConstantPool *MCP = MF->getConstantPool();
1731 int CPI0 = MO0.getIndex();
1732 int CPI1 = MO1.getIndex();
1733 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1734 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1735 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1736 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1737 if (isARMCP0 && isARMCP1) {
1738 ARMConstantPoolValue *ACPV0 =
1739 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1740 ARMConstantPoolValue *ACPV1 =
1741 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1742 return ACPV0->hasSameValue(ACPV1);
1743 } else if (!isARMCP0 && !isARMCP1) {
1744 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1745 }
1746 return false;
1747 } else if (Opcode == ARM::PICLDR) {
1748 if (MI1.getOpcode() != Opcode)
1749 return false;
1750 if (MI0.getNumOperands() != MI1.getNumOperands())
1751 return false;
1752
1753 Register Addr0 = MI0.getOperand(1).getReg();
1754 Register Addr1 = MI1.getOperand(1).getReg();
1755 if (Addr0 != Addr1) {
1756 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1757 return false;
1758
1759 // This assumes SSA form.
1760 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1761 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1762 // Check if the loaded value, e.g. a constantpool of a global address, are
1763 // the same.
1764 if (!produceSameValue(*Def0, *Def1, MRI))
1765 return false;
1766 }
1767
1768 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1769 // %12 = PICLDR %11, 0, 14, %noreg
1770 const MachineOperand &MO0 = MI0.getOperand(i);
1771 const MachineOperand &MO1 = MI1.getOperand(i);
1772 if (!MO0.isIdenticalTo(MO1))
1773 return false;
1774 }
1775 return true;
1776 }
1777
1779}
1780
1781/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1782/// determine if two loads are loading from the same base address. It should
1783/// only return true if the base pointers are the same and the only differences
1784/// between the two addresses is the offset. It also returns the offsets by
1785/// reference.
1786///
1787/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1788/// is permanently disabled.
1790 int64_t &Offset1,
1791 int64_t &Offset2) const {
1792 // Don't worry about Thumb: just ARM and Thumb2.
1793 if (Subtarget.isThumb1Only()) return false;
1794
1795 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1796 return false;
1797
1798 auto IsLoadOpcode = [&](unsigned Opcode) {
1799 switch (Opcode) {
1800 default:
1801 return false;
1802 case ARM::LDRi12:
1803 case ARM::LDRBi12:
1804 case ARM::LDRD:
1805 case ARM::LDRH:
1806 case ARM::LDRSB:
1807 case ARM::LDRSH:
1808 case ARM::VLDRD:
1809 case ARM::VLDRS:
1810 case ARM::t2LDRi8:
1811 case ARM::t2LDRBi8:
1812 case ARM::t2LDRDi8:
1813 case ARM::t2LDRSHi8:
1814 case ARM::t2LDRi12:
1815 case ARM::t2LDRBi12:
1816 case ARM::t2LDRSHi12:
1817 return true;
1818 }
1819 };
1820
1821 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1822 !IsLoadOpcode(Load2->getMachineOpcode()))
1823 return false;
1824
1825 // Check if base addresses and chain operands match.
1826 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1827 Load1->getOperand(4) != Load2->getOperand(4))
1828 return false;
1829
1830 // Index should be Reg0.
1831 if (Load1->getOperand(3) != Load2->getOperand(3))
1832 return false;
1833
1834 // Determine the offsets.
1835 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1836 isa<ConstantSDNode>(Load2->getOperand(1))) {
1837 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1838 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1839 return true;
1840 }
1841
1842 return false;
1843}
1844
1845/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1846/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1847/// be scheduled together. On some targets if two loads are loading from
1848/// addresses in the same cache line, it's better if they are scheduled
1849/// together. This function takes two integers that represent the load offsets
1850/// from the common base address. It returns true if it decides it's desirable
1851/// to schedule the two loads together. "NumLoads" is the number of loads that
1852/// have already been scheduled after Load1.
1853///
1854/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1855/// is permanently disabled.
1857 int64_t Offset1, int64_t Offset2,
1858 unsigned NumLoads) const {
1859 // Don't worry about Thumb: just ARM and Thumb2.
1860 if (Subtarget.isThumb1Only()) return false;
1861
1862 assert(Offset2 > Offset1);
1863
1864 if ((Offset2 - Offset1) / 8 > 64)
1865 return false;
1866
1867 // Check if the machine opcodes are different. If they are different
1868 // then we consider them to not be of the same base address,
1869 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1870 // In this case, they are considered to be the same because they are different
1871 // encoding forms of the same basic instruction.
1872 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1873 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1874 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1875 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1876 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1877 return false; // FIXME: overly conservative?
1878
1879 // Four loads in a row should be sufficient.
1880 if (NumLoads >= 3)
1881 return false;
1882
1883 return true;
1884}
1885
1887 const MachineBasicBlock *MBB,
1888 const MachineFunction &MF) const {
1889 // Debug info is never a scheduling boundary. It's necessary to be explicit
1890 // due to the special treatment of IT instructions below, otherwise a
1891 // dbg_value followed by an IT will result in the IT instruction being
1892 // considered a scheduling hazard, which is wrong. It should be the actual
1893 // instruction preceding the dbg_value instruction(s), just like it is
1894 // when debug info is not present.
1895 if (MI.isDebugInstr())
1896 return false;
1897
1898 // Terminators and labels can't be scheduled around.
1899 if (MI.isTerminator() || MI.isPosition())
1900 return true;
1901
1902 // INLINEASM_BR can jump to another block
1903 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1904 return true;
1905
1906 if (isSEHInstruction(MI))
1907 return true;
1908
1909 // Treat the start of the IT block as a scheduling boundary, but schedule
1910 // t2IT along with all instructions following it.
1911 // FIXME: This is a big hammer. But the alternative is to add all potential
1912 // true and anti dependencies to IT block instructions as implicit operands
1913 // to the t2IT instruction. The added compile time and complexity does not
1914 // seem worth it.
1916 // Make sure to skip any debug instructions
1917 while (++I != MBB->end() && I->isDebugInstr())
1918 ;
1919 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1920 return true;
1921
1922 // Don't attempt to schedule around any instruction that defines
1923 // a stack-oriented pointer, as it's unlikely to be profitable. This
1924 // saves compile time, because it doesn't require every single
1925 // stack slot reference to depend on the instruction that does the
1926 // modification.
1927 // Calls don't actually change the stack pointer, even if they have imp-defs.
1928 // No ARM calling conventions change the stack pointer. (X86 calling
1929 // conventions sometimes do).
1930 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
1931 return true;
1932
1933 return false;
1934}
1935
1938 unsigned NumCycles, unsigned ExtraPredCycles,
1939 BranchProbability Probability) const {
1940 if (!NumCycles)
1941 return false;
1942
1943 // If we are optimizing for size, see if the branch in the predecessor can be
1944 // lowered to cbn?z by the constant island lowering pass, and return false if
1945 // so. This results in a shorter instruction sequence.
1946 if (MBB.getParent()->getFunction().hasOptSize()) {
1947 MachineBasicBlock *Pred = *MBB.pred_begin();
1948 if (!Pred->empty()) {
1949 MachineInstr *LastMI = &*Pred->rbegin();
1950 if (LastMI->getOpcode() == ARM::t2Bcc) {
1952 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
1953 if (CmpMI)
1954 return false;
1955 }
1956 }
1957 }
1958 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1959 MBB, 0, 0, Probability);
1960}
1961
1964 unsigned TCycles, unsigned TExtra,
1965 MachineBasicBlock &FBB,
1966 unsigned FCycles, unsigned FExtra,
1967 BranchProbability Probability) const {
1968 if (!TCycles)
1969 return false;
1970
1971 // In thumb code we often end up trading one branch for a IT block, and
1972 // if we are cloning the instruction can increase code size. Prevent
1973 // blocks with multiple predecessors from being ifcvted to prevent this
1974 // cloning.
1975 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1976 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1977 return false;
1978 }
1979
1980 // Attempt to estimate the relative costs of predication versus branching.
1981 // Here we scale up each component of UnpredCost to avoid precision issue when
1982 // scaling TCycles/FCycles by Probability.
1983 const unsigned ScalingUpFactor = 1024;
1984
1985 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1986 unsigned UnpredCost;
1987 if (!Subtarget.hasBranchPredictor()) {
1988 // When we don't have a branch predictor it's always cheaper to not take a
1989 // branch than take it, so we have to take that into account.
1990 unsigned NotTakenBranchCost = 1;
1991 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1992 unsigned TUnpredCycles, FUnpredCycles;
1993 if (!FCycles) {
1994 // Triangle: TBB is the fallthrough
1995 TUnpredCycles = TCycles + NotTakenBranchCost;
1996 FUnpredCycles = TakenBranchCost;
1997 } else {
1998 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1999 TUnpredCycles = TCycles + TakenBranchCost;
2000 FUnpredCycles = FCycles + NotTakenBranchCost;
2001 // The branch at the end of FBB will disappear when it's predicated, so
2002 // discount it from PredCost.
2003 PredCost -= 1 * ScalingUpFactor;
2004 }
2005 // The total cost is the cost of each path scaled by their probabilities
2006 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2007 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2008 UnpredCost = TUnpredCost + FUnpredCost;
2009 // When predicating assume that the first IT can be folded away but later
2010 // ones cost one cycle each
2011 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2012 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2013 }
2014 } else {
2015 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2016 unsigned FUnpredCost =
2017 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2018 UnpredCost = TUnpredCost + FUnpredCost;
2019 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2020 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2021 }
2022
2023 return PredCost <= UnpredCost;
2024}
2025
2026unsigned
2028 unsigned NumInsts) const {
2029 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2030 // ARM has a condition code field in every predicable instruction, using it
2031 // doesn't change code size.
2032 if (!Subtarget.isThumb2())
2033 return 0;
2034
2035 // It's possible that the size of the IT is restricted to a single block.
2036 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2037 return divideCeil(NumInsts, MaxInsts) * 2;
2038}
2039
2040unsigned
2042 // If this branch is likely to be folded into the comparison to form a
2043 // CB(N)Z, then removing it won't reduce code size at all, because that will
2044 // just replace the CB(N)Z with a CMP.
2045 if (MI.getOpcode() == ARM::t2Bcc &&
2047 return 0;
2048
2049 unsigned Size = getInstSizeInBytes(MI);
2050
2051 // For Thumb2, all branches are 32-bit instructions during the if conversion
2052 // pass, but may be replaced with 16-bit instructions during size reduction.
2053 // Since the branches considered by if conversion tend to be forward branches
2054 // over small basic blocks, they are very likely to be in range for the
2055 // narrow instructions, so we assume the final code size will be half what it
2056 // currently is.
2057 if (Subtarget.isThumb2())
2058 Size /= 2;
2059
2060 return Size;
2061}
2062
2063bool
2065 MachineBasicBlock &FMBB) const {
2066 // Reduce false anti-dependencies to let the target's out-of-order execution
2067 // engine do its thing.
2068 return Subtarget.isProfitableToUnpredicate();
2069}
2070
2071/// getInstrPredicate - If instruction is predicated, returns its predicate
2072/// condition, otherwise returns AL. It also returns the condition code
2073/// register by reference.
2075 Register &PredReg) {
2076 int PIdx = MI.findFirstPredOperandIdx();
2077 if (PIdx == -1) {
2078 PredReg = 0;
2079 return ARMCC::AL;
2080 }
2081
2082 PredReg = MI.getOperand(PIdx+1).getReg();
2083 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2084}
2085
2087 if (Opc == ARM::B)
2088 return ARM::Bcc;
2089 if (Opc == ARM::tB)
2090 return ARM::tBcc;
2091 if (Opc == ARM::t2B)
2092 return ARM::t2Bcc;
2093
2094 llvm_unreachable("Unknown unconditional branch opcode!");
2095}
2096
2098 bool NewMI,
2099 unsigned OpIdx1,
2100 unsigned OpIdx2) const {
2101 switch (MI.getOpcode()) {
2102 case ARM::MOVCCr:
2103 case ARM::t2MOVCCr: {
2104 // MOVCC can be commuted by inverting the condition.
2105 Register PredReg;
2106 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2107 // MOVCC AL can't be inverted. Shouldn't happen.
2108 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2109 return nullptr;
2110 MachineInstr *CommutedMI =
2111 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2112 if (!CommutedMI)
2113 return nullptr;
2114 // After swapping the MOVCC operands, also invert the condition.
2115 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2117 return CommutedMI;
2118 }
2119 }
2120 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2121}
2122
2123/// Identify instructions that can be folded into a MOVCC instruction, and
2124/// return the defining instruction.
2126ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2127 const TargetInstrInfo *TII) const {
2128 if (!Reg.isVirtual())
2129 return nullptr;
2130 if (!MRI.hasOneNonDBGUse(Reg))
2131 return nullptr;
2132 MachineInstr *MI = MRI.getVRegDef(Reg);
2133 if (!MI)
2134 return nullptr;
2135 // Check if MI can be predicated and folded into the MOVCC.
2136 if (!isPredicable(*MI))
2137 return nullptr;
2138 // Check if MI has any non-dead defs or physreg uses. This also detects
2139 // predicated instructions which will be reading CPSR.
2140 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2141 // Reject frame index operands, PEI can't handle the predicated pseudos.
2142 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2143 return nullptr;
2144 if (!MO.isReg())
2145 continue;
2146 // MI can't have any tied operands, that would conflict with predication.
2147 if (MO.isTied())
2148 return nullptr;
2149 if (MO.getReg().isPhysical())
2150 return nullptr;
2151 if (MO.isDef() && !MO.isDead())
2152 return nullptr;
2153 }
2154 bool DontMoveAcrossStores = true;
2155 if (!MI->isSafeToMove(DontMoveAcrossStores))
2156 return nullptr;
2157 return MI;
2158}
2159
2163 bool PreferFalse) const {
2164 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2165 "Unknown select instruction");
2166 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2167 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2168 bool Invert = !DefMI;
2169 if (!DefMI)
2170 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2171 if (!DefMI)
2172 return nullptr;
2173
2174 // Find new register class to use.
2175 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2176 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2177 Register DestReg = MI.getOperand(0).getReg();
2178 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2179 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2180 if (!MRI.constrainRegClass(DestReg, FalseClass))
2181 return nullptr;
2182 if (!MRI.constrainRegClass(DestReg, TrueClass))
2183 return nullptr;
2184
2185 // Create a new predicated version of DefMI.
2186 // Rfalse is the first use.
2187 MachineInstrBuilder NewMI =
2188 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2189
2190 // Copy all the DefMI operands, excluding its (null) predicate.
2191 const MCInstrDesc &DefDesc = DefMI->getDesc();
2192 for (unsigned i = 1, e = DefDesc.getNumOperands();
2193 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2194 NewMI.add(DefMI->getOperand(i));
2195
2196 unsigned CondCode = MI.getOperand(3).getImm();
2197 if (Invert)
2199 else
2200 NewMI.addImm(CondCode);
2201 NewMI.add(MI.getOperand(4));
2202
2203 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2204 if (NewMI->hasOptionalDef())
2205 NewMI.add(condCodeOp());
2206
2207 // The output register value when the predicate is false is an implicit
2208 // register operand tied to the first def.
2209 // The tie makes the register allocator ensure the FalseReg is allocated the
2210 // same register as operand 0.
2211 FalseReg.setImplicit();
2212 NewMI.add(FalseReg);
2213 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2214
2215 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2216 SeenMIs.insert(NewMI);
2217 SeenMIs.erase(DefMI);
2218
2219 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2220 // DefMI would be invalid when transferred inside the loop. Checking for a
2221 // loop is expensive, but at least remove kill flags if they are in different
2222 // BBs.
2223 if (DefMI->getParent() != MI.getParent())
2224 NewMI->clearKillInfo();
2225
2226 // The caller will erase MI, but not DefMI.
2227 DefMI->eraseFromParent();
2228 return NewMI;
2229}
2230
2231/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2232/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2233/// def operand.
2234///
2235/// This will go away once we can teach tblgen how to set the optional CPSR def
2236/// operand itself.
2241
2243 {ARM::ADDSri, ARM::ADDri},
2244 {ARM::ADDSrr, ARM::ADDrr},
2245 {ARM::ADDSrsi, ARM::ADDrsi},
2246 {ARM::ADDSrsr, ARM::ADDrsr},
2247
2248 {ARM::SUBSri, ARM::SUBri},
2249 {ARM::SUBSrr, ARM::SUBrr},
2250 {ARM::SUBSrsi, ARM::SUBrsi},
2251 {ARM::SUBSrsr, ARM::SUBrsr},
2252
2253 {ARM::RSBSri, ARM::RSBri},
2254 {ARM::RSBSrsi, ARM::RSBrsi},
2255 {ARM::RSBSrsr, ARM::RSBrsr},
2256
2257 {ARM::tADDSi3, ARM::tADDi3},
2258 {ARM::tADDSi8, ARM::tADDi8},
2259 {ARM::tADDSrr, ARM::tADDrr},
2260 {ARM::tADCS, ARM::tADC},
2261
2262 {ARM::tSUBSi3, ARM::tSUBi3},
2263 {ARM::tSUBSi8, ARM::tSUBi8},
2264 {ARM::tSUBSrr, ARM::tSUBrr},
2265 {ARM::tSBCS, ARM::tSBC},
2266 {ARM::tRSBS, ARM::tRSB},
2267 {ARM::tLSLSri, ARM::tLSLri},
2268
2269 {ARM::t2ADDSri, ARM::t2ADDri},
2270 {ARM::t2ADDSrr, ARM::t2ADDrr},
2271 {ARM::t2ADDSrs, ARM::t2ADDrs},
2272
2273 {ARM::t2SUBSri, ARM::t2SUBri},
2274 {ARM::t2SUBSrr, ARM::t2SUBrr},
2275 {ARM::t2SUBSrs, ARM::t2SUBrs},
2276
2277 {ARM::t2RSBSri, ARM::t2RSBri},
2278 {ARM::t2RSBSrs, ARM::t2RSBrs},
2279};
2280
2281unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2282 for (const auto &Entry : AddSubFlagsOpcodeMap)
2283 if (OldOpc == Entry.PseudoOpc)
2284 return Entry.MachineOpc;
2285 return 0;
2286}
2287
2290 const DebugLoc &dl, Register DestReg,
2291 Register BaseReg, int NumBytes,
2292 ARMCC::CondCodes Pred, Register PredReg,
2293 const ARMBaseInstrInfo &TII,
2294 unsigned MIFlags) {
2295 if (NumBytes == 0 && DestReg != BaseReg) {
2296 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2297 .addReg(BaseReg, RegState::Kill)
2298 .add(predOps(Pred, PredReg))
2299 .add(condCodeOp())
2300 .setMIFlags(MIFlags);
2301 return;
2302 }
2303
2304 bool isSub = NumBytes < 0;
2305 if (isSub) NumBytes = -NumBytes;
2306
2307 while (NumBytes) {
2308 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2309 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2310 assert(ThisVal && "Didn't extract field correctly");
2311
2312 // We will handle these bits from offset, clear them.
2313 NumBytes &= ~ThisVal;
2314
2315 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2316
2317 // Build the new ADD / SUB.
2318 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2319 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2320 .addReg(BaseReg, RegState::Kill)
2321 .addImm(ThisVal)
2322 .add(predOps(Pred, PredReg))
2323 .add(condCodeOp())
2324 .setMIFlags(MIFlags);
2325 BaseReg = DestReg;
2326 }
2327}
2328
2331 unsigned NumBytes) {
2332 // This optimisation potentially adds lots of load and store
2333 // micro-operations, it's only really a great benefit to code-size.
2334 if (!Subtarget.hasMinSize())
2335 return false;
2336
2337 // If only one register is pushed/popped, LLVM can use an LDR/STR
2338 // instead. We can't modify those so make sure we're dealing with an
2339 // instruction we understand.
2340 bool IsPop = isPopOpcode(MI->getOpcode());
2341 bool IsPush = isPushOpcode(MI->getOpcode());
2342 if (!IsPush && !IsPop)
2343 return false;
2344
2345 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2346 MI->getOpcode() == ARM::VLDMDIA_UPD;
2347 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2348 MI->getOpcode() == ARM::tPOP ||
2349 MI->getOpcode() == ARM::tPOP_RET;
2350
2351 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2352 MI->getOperand(1).getReg() == ARM::SP)) &&
2353 "trying to fold sp update into non-sp-updating push/pop");
2354
2355 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2356 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2357 // if this is violated.
2358 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2359 return false;
2360
2361 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2362 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2363 int RegListIdx = IsT1PushPop ? 2 : 4;
2364
2365 // Calculate the space we'll need in terms of registers.
2366 unsigned RegsNeeded;
2367 const TargetRegisterClass *RegClass;
2368 if (IsVFPPushPop) {
2369 RegsNeeded = NumBytes / 8;
2370 RegClass = &ARM::DPRRegClass;
2371 } else {
2372 RegsNeeded = NumBytes / 4;
2373 RegClass = &ARM::GPRRegClass;
2374 }
2375
2376 // We're going to have to strip all list operands off before
2377 // re-adding them since the order matters, so save the existing ones
2378 // for later.
2380
2381 // We're also going to need the first register transferred by this
2382 // instruction, which won't necessarily be the first register in the list.
2383 unsigned FirstRegEnc = -1;
2384
2386 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2387 MachineOperand &MO = MI->getOperand(i);
2388 RegList.push_back(MO);
2389
2390 if (MO.isReg() && !MO.isImplicit() &&
2391 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2392 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2393 }
2394
2395 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2396
2397 // Now try to find enough space in the reglist to allocate NumBytes.
2398 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2399 --CurRegEnc) {
2400 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2401 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2402 continue;
2403 if (!IsPop) {
2404 // Pushing any register is completely harmless, mark the register involved
2405 // as undef since we don't care about its value and must not restore it
2406 // during stack unwinding.
2407 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2408 false, false, true));
2409 --RegsNeeded;
2410 continue;
2411 }
2412
2413 // However, we can only pop an extra register if it's not live. For
2414 // registers live within the function we might clobber a return value
2415 // register; the other way a register can be live here is if it's
2416 // callee-saved.
2417 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2418 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2420 // VFP pops don't allow holes in the register list, so any skip is fatal
2421 // for our transformation. GPR pops do, so we should just keep looking.
2422 if (IsVFPPushPop)
2423 return false;
2424 else
2425 continue;
2426 }
2427
2428 // Mark the unimportant registers as <def,dead> in the POP.
2429 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2430 true));
2431 --RegsNeeded;
2432 }
2433
2434 if (RegsNeeded > 0)
2435 return false;
2436
2437 // Finally we know we can profitably perform the optimisation so go
2438 // ahead: strip all existing registers off and add them back again
2439 // in the right order.
2440 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2441 MI->removeOperand(i);
2442
2443 // Add the complete list back in.
2444 MachineInstrBuilder MIB(MF, &*MI);
2445 for (const MachineOperand &MO : llvm::reverse(RegList))
2446 MIB.add(MO);
2447
2448 return true;
2449}
2450
2451bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2452 Register FrameReg, int &Offset,
2453 const ARMBaseInstrInfo &TII) {
2454 unsigned Opcode = MI.getOpcode();
2455 const MCInstrDesc &Desc = MI.getDesc();
2456 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2457 bool isSub = false;
2458
2459 // Memory operands in inline assembly always use AddrMode2.
2460 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2462
2463 if (Opcode == ARM::ADDri) {
2464 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2465 if (Offset == 0) {
2466 // Turn it into a move.
2467 MI.setDesc(TII.get(ARM::MOVr));
2468 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2469 MI.removeOperand(FrameRegIdx+1);
2470 Offset = 0;
2471 return true;
2472 } else if (Offset < 0) {
2473 Offset = -Offset;
2474 isSub = true;
2475 MI.setDesc(TII.get(ARM::SUBri));
2476 }
2477
2478 // Common case: small offset, fits into instruction.
2479 if (ARM_AM::getSOImmVal(Offset) != -1) {
2480 // Replace the FrameIndex with sp / fp
2481 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2482 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2483 Offset = 0;
2484 return true;
2485 }
2486
2487 // Otherwise, pull as much of the immediate into this ADDri/SUBri
2488 // as possible.
2489 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2490 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2491
2492 // We will handle these bits from offset, clear them.
2493 Offset &= ~ThisImmVal;
2494
2495 // Get the properly encoded SOImmVal field.
2496 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2497 "Bit extraction didn't work?");
2498 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2499 } else {
2500 unsigned ImmIdx = 0;
2501 int InstrOffs = 0;
2502 unsigned NumBits = 0;
2503 unsigned Scale = 1;
2504 switch (AddrMode) {
2506 ImmIdx = FrameRegIdx + 1;
2507 InstrOffs = MI.getOperand(ImmIdx).getImm();
2508 NumBits = 12;
2509 break;
2510 case ARMII::AddrMode2:
2511 ImmIdx = FrameRegIdx+2;
2512 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2513 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2514 InstrOffs *= -1;
2515 NumBits = 12;
2516 break;
2517 case ARMII::AddrMode3:
2518 ImmIdx = FrameRegIdx+2;
2519 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2520 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2521 InstrOffs *= -1;
2522 NumBits = 8;
2523 break;
2524 case ARMII::AddrMode4:
2525 case ARMII::AddrMode6:
2526 // Can't fold any offset even if it's zero.
2527 return false;
2528 case ARMII::AddrMode5:
2529 ImmIdx = FrameRegIdx+1;
2530 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2531 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2532 InstrOffs *= -1;
2533 NumBits = 8;
2534 Scale = 4;
2535 break;
2537 ImmIdx = FrameRegIdx+1;
2538 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2539 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2540 InstrOffs *= -1;
2541 NumBits = 8;
2542 Scale = 2;
2543 break;
2547 ImmIdx = FrameRegIdx+1;
2548 InstrOffs = MI.getOperand(ImmIdx).getImm();
2549 NumBits = 7;
2550 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2551 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2552 break;
2553 default:
2554 llvm_unreachable("Unsupported addressing mode!");
2555 }
2556
2557 Offset += InstrOffs * Scale;
2558 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2559 if (Offset < 0) {
2560 Offset = -Offset;
2561 isSub = true;
2562 }
2563
2564 // Attempt to fold address comp. if opcode has offset bits
2565 if (NumBits > 0) {
2566 // Common case: small offset, fits into instruction.
2567 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2568 int ImmedOffset = Offset / Scale;
2569 unsigned Mask = (1 << NumBits) - 1;
2570 if ((unsigned)Offset <= Mask * Scale) {
2571 // Replace the FrameIndex with sp
2572 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2573 // FIXME: When addrmode2 goes away, this will simplify (like the
2574 // T2 version), as the LDR.i12 versions don't need the encoding
2575 // tricks for the offset value.
2576 if (isSub) {
2578 ImmedOffset = -ImmedOffset;
2579 else
2580 ImmedOffset |= 1 << NumBits;
2581 }
2582 ImmOp.ChangeToImmediate(ImmedOffset);
2583 Offset = 0;
2584 return true;
2585 }
2586
2587 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2588 ImmedOffset = ImmedOffset & Mask;
2589 if (isSub) {
2591 ImmedOffset = -ImmedOffset;
2592 else
2593 ImmedOffset |= 1 << NumBits;
2594 }
2595 ImmOp.ChangeToImmediate(ImmedOffset);
2596 Offset &= ~(Mask*Scale);
2597 }
2598 }
2599
2600 Offset = (isSub) ? -Offset : Offset;
2601 return Offset == 0;
2602}
2603
2604/// analyzeCompare - For a comparison instruction, return the source registers
2605/// in SrcReg and SrcReg2 if having two register operands, and the value it
2606/// compares against in CmpValue. Return true if the comparison instruction
2607/// can be analyzed.
2609 Register &SrcReg2, int64_t &CmpMask,
2610 int64_t &CmpValue) const {
2611 switch (MI.getOpcode()) {
2612 default: break;
2613 case ARM::CMPri:
2614 case ARM::t2CMPri:
2615 case ARM::tCMPi8:
2616 SrcReg = MI.getOperand(0).getReg();
2617 SrcReg2 = 0;
2618 CmpMask = ~0;
2619 CmpValue = MI.getOperand(1).getImm();
2620 return true;
2621 case ARM::CMPrr:
2622 case ARM::t2CMPrr:
2623 case ARM::tCMPr:
2624 SrcReg = MI.getOperand(0).getReg();
2625 SrcReg2 = MI.getOperand(1).getReg();
2626 CmpMask = ~0;
2627 CmpValue = 0;
2628 return true;
2629 case ARM::TSTri:
2630 case ARM::t2TSTri:
2631 SrcReg = MI.getOperand(0).getReg();
2632 SrcReg2 = 0;
2633 CmpMask = MI.getOperand(1).getImm();
2634 CmpValue = 0;
2635 return true;
2636 }
2637
2638 return false;
2639}
2640
2641/// isSuitableForMask - Identify a suitable 'and' instruction that
2642/// operates on the given source register and applies the same mask
2643/// as a 'tst' instruction. Provide a limited look-through for copies.
2644/// When successful, MI will hold the found instruction.
2646 int CmpMask, bool CommonUse) {
2647 switch (MI->getOpcode()) {
2648 case ARM::ANDri:
2649 case ARM::t2ANDri:
2650 if (CmpMask != MI->getOperand(2).getImm())
2651 return false;
2652 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2653 return true;
2654 break;
2655 }
2656
2657 return false;
2658}
2659
2660/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2661/// the condition code if we modify the instructions such that flags are
2662/// set by ADD(a,b,X).
2664 switch (CC) {
2665 default: return ARMCC::AL;
2666 case ARMCC::HS: return ARMCC::LO;
2667 case ARMCC::LO: return ARMCC::HS;
2668 case ARMCC::VS: return ARMCC::VS;
2669 case ARMCC::VC: return ARMCC::VC;
2670 }
2671}
2672
2673/// isRedundantFlagInstr - check whether the first instruction, whose only
2674/// purpose is to update flags, can be made redundant.
2675/// CMPrr can be made redundant by SUBrr if the operands are the same.
2676/// CMPri can be made redundant by SUBri if the operands are the same.
2677/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2678/// This function can be extended later on.
2679inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2680 Register SrcReg, Register SrcReg2,
2681 int64_t ImmValue,
2682 const MachineInstr *OI,
2683 bool &IsThumb1) {
2684 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2685 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2686 ((OI->getOperand(1).getReg() == SrcReg &&
2687 OI->getOperand(2).getReg() == SrcReg2) ||
2688 (OI->getOperand(1).getReg() == SrcReg2 &&
2689 OI->getOperand(2).getReg() == SrcReg))) {
2690 IsThumb1 = false;
2691 return true;
2692 }
2693
2694 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2695 ((OI->getOperand(2).getReg() == SrcReg &&
2696 OI->getOperand(3).getReg() == SrcReg2) ||
2697 (OI->getOperand(2).getReg() == SrcReg2 &&
2698 OI->getOperand(3).getReg() == SrcReg))) {
2699 IsThumb1 = true;
2700 return true;
2701 }
2702
2703 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2704 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2705 OI->getOperand(1).getReg() == SrcReg &&
2706 OI->getOperand(2).getImm() == ImmValue) {
2707 IsThumb1 = false;
2708 return true;
2709 }
2710
2711 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2712 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2713 OI->getOperand(2).getReg() == SrcReg &&
2714 OI->getOperand(3).getImm() == ImmValue) {
2715 IsThumb1 = true;
2716 return true;
2717 }
2718
2719 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2720 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2721 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2722 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2723 OI->getOperand(0).getReg() == SrcReg &&
2724 OI->getOperand(1).getReg() == SrcReg2) {
2725 IsThumb1 = false;
2726 return true;
2727 }
2728
2729 if (CmpI->getOpcode() == ARM::tCMPr &&
2730 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2731 OI->getOpcode() == ARM::tADDrr) &&
2732 OI->getOperand(0).getReg() == SrcReg &&
2733 OI->getOperand(2).getReg() == SrcReg2) {
2734 IsThumb1 = true;
2735 return true;
2736 }
2737
2738 return false;
2739}
2740
2741static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2742 switch (MI->getOpcode()) {
2743 default: return false;
2744 case ARM::tLSLri:
2745 case ARM::tLSRri:
2746 case ARM::tLSLrr:
2747 case ARM::tLSRrr:
2748 case ARM::tSUBrr:
2749 case ARM::tADDrr:
2750 case ARM::tADDi3:
2751 case ARM::tADDi8:
2752 case ARM::tSUBi3:
2753 case ARM::tSUBi8:
2754 case ARM::tMUL:
2755 case ARM::tADC:
2756 case ARM::tSBC:
2757 case ARM::tRSB:
2758 case ARM::tAND:
2759 case ARM::tORR:
2760 case ARM::tEOR:
2761 case ARM::tBIC:
2762 case ARM::tMVN:
2763 case ARM::tASRri:
2764 case ARM::tASRrr:
2765 case ARM::tROR:
2766 IsThumb1 = true;
2767 [[fallthrough]];
2768 case ARM::RSBrr:
2769 case ARM::RSBri:
2770 case ARM::RSCrr:
2771 case ARM::RSCri:
2772 case ARM::ADDrr:
2773 case ARM::ADDri:
2774 case ARM::ADCrr:
2775 case ARM::ADCri:
2776 case ARM::SUBrr:
2777 case ARM::SUBri:
2778 case ARM::SBCrr:
2779 case ARM::SBCri:
2780 case ARM::t2RSBri:
2781 case ARM::t2ADDrr:
2782 case ARM::t2ADDri:
2783 case ARM::t2ADCrr:
2784 case ARM::t2ADCri:
2785 case ARM::t2SUBrr:
2786 case ARM::t2SUBri:
2787 case ARM::t2SBCrr:
2788 case ARM::t2SBCri:
2789 case ARM::ANDrr:
2790 case ARM::ANDri:
2791 case ARM::ANDrsr:
2792 case ARM::ANDrsi:
2793 case ARM::t2ANDrr:
2794 case ARM::t2ANDri:
2795 case ARM::t2ANDrs:
2796 case ARM::ORRrr:
2797 case ARM::ORRri:
2798 case ARM::ORRrsr:
2799 case ARM::ORRrsi:
2800 case ARM::t2ORRrr:
2801 case ARM::t2ORRri:
2802 case ARM::t2ORRrs:
2803 case ARM::EORrr:
2804 case ARM::EORri:
2805 case ARM::EORrsr:
2806 case ARM::EORrsi:
2807 case ARM::t2EORrr:
2808 case ARM::t2EORri:
2809 case ARM::t2EORrs:
2810 case ARM::BICri:
2811 case ARM::BICrr:
2812 case ARM::BICrsi:
2813 case ARM::BICrsr:
2814 case ARM::t2BICri:
2815 case ARM::t2BICrr:
2816 case ARM::t2BICrs:
2817 case ARM::t2LSRri:
2818 case ARM::t2LSRrr:
2819 case ARM::t2LSLri:
2820 case ARM::t2LSLrr:
2821 case ARM::MOVsr:
2822 case ARM::MOVsi:
2823 return true;
2824 }
2825}
2826
2827/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2828/// comparison into one that sets the zero bit in the flags register;
2829/// Remove a redundant Compare instruction if an earlier instruction can set the
2830/// flags in the same way as Compare.
2831/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2832/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2833/// condition code of instructions which use the flags.
2835 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2836 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2837 // Get the unique definition of SrcReg.
2838 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2839 if (!MI) return false;
2840
2841 // Masked compares sometimes use the same register as the corresponding 'and'.
2842 if (CmpMask != ~0) {
2843 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2844 MI = nullptr;
2846 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2847 UI != UE; ++UI) {
2848 if (UI->getParent() != CmpInstr.getParent())
2849 continue;
2850 MachineInstr *PotentialAND = &*UI;
2851 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2852 isPredicated(*PotentialAND))
2853 continue;
2854 MI = PotentialAND;
2855 break;
2856 }
2857 if (!MI) return false;
2858 }
2859 }
2860
2861 // Get ready to iterate backward from CmpInstr.
2862 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2863 B = CmpInstr.getParent()->begin();
2864
2865 // Early exit if CmpInstr is at the beginning of the BB.
2866 if (I == B) return false;
2867
2868 // There are two possible candidates which can be changed to set CPSR:
2869 // One is MI, the other is a SUB or ADD instruction.
2870 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2871 // ADDr[ri](r1, r2, X).
2872 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2873 MachineInstr *SubAdd = nullptr;
2874 if (SrcReg2 != 0)
2875 // MI is not a candidate for CMPrr.
2876 MI = nullptr;
2877 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2878 // Conservatively refuse to convert an instruction which isn't in the same
2879 // BB as the comparison.
2880 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2881 // Thus we cannot return here.
2882 if (CmpInstr.getOpcode() == ARM::CMPri ||
2883 CmpInstr.getOpcode() == ARM::t2CMPri ||
2884 CmpInstr.getOpcode() == ARM::tCMPi8)
2885 MI = nullptr;
2886 else
2887 return false;
2888 }
2889
2890 bool IsThumb1 = false;
2891 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2892 return false;
2893
2894 // We also want to do this peephole for cases like this: if (a*b == 0),
2895 // and optimise away the CMP instruction from the generated code sequence:
2896 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2897 // resulting from the select instruction, but these MOVS instructions for
2898 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2899 // However, if we only have MOVS instructions in between the CMP and the
2900 // other instruction (the MULS in this example), then the CPSR is dead so we
2901 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2902 // reordering and then continue the analysis hoping we can eliminate the
2903 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2904 // consequence, the movs won't redefine/kill the MUL operands which would
2905 // make this reordering illegal.
2907 if (MI && IsThumb1) {
2908 --I;
2909 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2910 bool CanReorder = true;
2911 for (; I != E; --I) {
2912 if (I->getOpcode() != ARM::tMOVi8) {
2913 CanReorder = false;
2914 break;
2915 }
2916 }
2917 if (CanReorder) {
2918 MI = MI->removeFromParent();
2919 E = CmpInstr;
2920 CmpInstr.getParent()->insert(E, MI);
2921 }
2922 }
2923 I = CmpInstr;
2924 E = MI;
2925 }
2926
2927 // Check that CPSR isn't set between the comparison instruction and the one we
2928 // want to change. At the same time, search for SubAdd.
2929 bool SubAddIsThumb1 = false;
2930 do {
2931 const MachineInstr &Instr = *--I;
2932
2933 // Check whether CmpInstr can be made redundant by the current instruction.
2934 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2935 SubAddIsThumb1)) {
2936 SubAdd = &*I;
2937 break;
2938 }
2939
2940 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2941 if (I == E)
2942 break;
2943
2944 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2945 Instr.readsRegister(ARM::CPSR, TRI))
2946 // This instruction modifies or uses CPSR after the one we want to
2947 // change. We can't do this transformation.
2948 return false;
2949
2950 if (I == B) {
2951 // In some cases, we scan the use-list of an instruction for an AND;
2952 // that AND is in the same BB, but may not be scheduled before the
2953 // corresponding TST. In that case, bail out.
2954 //
2955 // FIXME: We could try to reschedule the AND.
2956 return false;
2957 }
2958 } while (true);
2959
2960 // Return false if no candidates exist.
2961 if (!MI && !SubAdd)
2962 return false;
2963
2964 // If we found a SubAdd, use it as it will be closer to the CMP
2965 if (SubAdd) {
2966 MI = SubAdd;
2967 IsThumb1 = SubAddIsThumb1;
2968 }
2969
2970 // We can't use a predicated instruction - it doesn't always write the flags.
2971 if (isPredicated(*MI))
2972 return false;
2973
2974 // Scan forward for the use of CPSR
2975 // When checking against MI: if it's a conditional code that requires
2976 // checking of the V bit or C bit, then this is not safe to do.
2977 // It is safe to remove CmpInstr if CPSR is redefined or killed.
2978 // If we are done with the basic block, we need to check whether CPSR is
2979 // live-out.
2981 OperandsToUpdate;
2982 bool isSafe = false;
2983 I = CmpInstr;
2984 E = CmpInstr.getParent()->end();
2985 while (!isSafe && ++I != E) {
2986 const MachineInstr &Instr = *I;
2987 for (unsigned IO = 0, EO = Instr.getNumOperands();
2988 !isSafe && IO != EO; ++IO) {
2989 const MachineOperand &MO = Instr.getOperand(IO);
2990 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2991 isSafe = true;
2992 break;
2993 }
2994 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2995 continue;
2996 if (MO.isDef()) {
2997 isSafe = true;
2998 break;
2999 }
3000 // Condition code is after the operand before CPSR except for VSELs.
3002 bool IsInstrVSel = true;
3003 switch (Instr.getOpcode()) {
3004 default:
3005 IsInstrVSel = false;
3006 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3007 break;
3008 case ARM::VSELEQD:
3009 case ARM::VSELEQS:
3010 case ARM::VSELEQH:
3011 CC = ARMCC::EQ;
3012 break;
3013 case ARM::VSELGTD:
3014 case ARM::VSELGTS:
3015 case ARM::VSELGTH:
3016 CC = ARMCC::GT;
3017 break;
3018 case ARM::VSELGED:
3019 case ARM::VSELGES:
3020 case ARM::VSELGEH:
3021 CC = ARMCC::GE;
3022 break;
3023 case ARM::VSELVSD:
3024 case ARM::VSELVSS:
3025 case ARM::VSELVSH:
3026 CC = ARMCC::VS;
3027 break;
3028 }
3029
3030 if (SubAdd) {
3031 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3032 // on CMP needs to be updated to be based on SUB.
3033 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3034 // needs to be modified.
3035 // Push the condition code operands to OperandsToUpdate.
3036 // If it is safe to remove CmpInstr, the condition code of these
3037 // operands will be modified.
3038 unsigned Opc = SubAdd->getOpcode();
3039 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3040 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3041 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3042 Opc == ARM::tSUBi8;
3043 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3044 if (!IsSub ||
3045 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3046 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3047 // VSel doesn't support condition code update.
3048 if (IsInstrVSel)
3049 return false;
3050 // Ensure we can swap the condition.
3051 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3052 if (NewCC == ARMCC::AL)
3053 return false;
3054 OperandsToUpdate.push_back(
3055 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3056 }
3057 } else {
3058 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3059 switch (CC) {
3060 case ARMCC::EQ: // Z
3061 case ARMCC::NE: // Z
3062 case ARMCC::MI: // N
3063 case ARMCC::PL: // N
3064 case ARMCC::AL: // none
3065 // CPSR can be used multiple times, we should continue.
3066 break;
3067 case ARMCC::HS: // C
3068 case ARMCC::LO: // C
3069 case ARMCC::VS: // V
3070 case ARMCC::VC: // V
3071 case ARMCC::HI: // C Z
3072 case ARMCC::LS: // C Z
3073 case ARMCC::GE: // N V
3074 case ARMCC::LT: // N V
3075 case ARMCC::GT: // Z N V
3076 case ARMCC::LE: // Z N V
3077 // The instruction uses the V bit or C bit which is not safe.
3078 return false;
3079 }
3080 }
3081 }
3082 }
3083
3084 // If CPSR is not killed nor re-defined, we should check whether it is
3085 // live-out. If it is live-out, do not optimize.
3086 if (!isSafe) {
3087 MachineBasicBlock *MBB = CmpInstr.getParent();
3088 for (MachineBasicBlock *Succ : MBB->successors())
3089 if (Succ->isLiveIn(ARM::CPSR))
3090 return false;
3091 }
3092
3093 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3094 // set CPSR so this is represented as an explicit output)
3095 if (!IsThumb1) {
3096 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3097 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3098 MI->getOperand(CPSRRegNum).setIsDef(true);
3099 }
3100 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3101 CmpInstr.eraseFromParent();
3102
3103 // Modify the condition code of operands in OperandsToUpdate.
3104 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3105 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3106 for (auto &[MO, Cond] : OperandsToUpdate)
3107 MO->setImm(Cond);
3108
3109 MI->clearRegisterDeads(ARM::CPSR);
3110
3111 return true;
3112}
3113
3115 // Do not sink MI if it might be used to optimize a redundant compare.
3116 // We heuristically only look at the instruction immediately following MI to
3117 // avoid potentially searching the entire basic block.
3118 if (isPredicated(MI))
3119 return true;
3121 ++Next;
3122 Register SrcReg, SrcReg2;
3123 int64_t CmpMask, CmpValue;
3124 bool IsThumb1;
3125 if (Next != MI.getParent()->end() &&
3126 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3127 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3128 return false;
3129 return true;
3130}
3131
3133 Register Reg,
3134 MachineRegisterInfo *MRI) const {
3135 // Fold large immediates into add, sub, or, xor.
3136 unsigned DefOpc = DefMI.getOpcode();
3137 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3138 DefOpc != ARM::tMOVi32imm)
3139 return false;
3140 if (!DefMI.getOperand(1).isImm())
3141 // Could be t2MOVi32imm @xx
3142 return false;
3143
3144 if (!MRI->hasOneNonDBGUse(Reg))
3145 return false;
3146
3147 const MCInstrDesc &DefMCID = DefMI.getDesc();
3148 if (DefMCID.hasOptionalDef()) {
3149 unsigned NumOps = DefMCID.getNumOperands();
3150 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3151 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3152 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3153 // to delete DefMI.
3154 return false;
3155 }
3156
3157 const MCInstrDesc &UseMCID = UseMI.getDesc();
3158 if (UseMCID.hasOptionalDef()) {
3159 unsigned NumOps = UseMCID.getNumOperands();
3160 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3161 // If the instruction sets the flag, do not attempt this optimization
3162 // since it may change the semantics of the code.
3163 return false;
3164 }
3165
3166 unsigned UseOpc = UseMI.getOpcode();
3167 unsigned NewUseOpc = 0;
3168 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3169 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3170 bool Commute = false;
3171 switch (UseOpc) {
3172 default: return false;
3173 case ARM::SUBrr:
3174 case ARM::ADDrr:
3175 case ARM::ORRrr:
3176 case ARM::EORrr:
3177 case ARM::t2SUBrr:
3178 case ARM::t2ADDrr:
3179 case ARM::t2ORRrr:
3180 case ARM::t2EORrr: {
3181 Commute = UseMI.getOperand(2).getReg() != Reg;
3182 switch (UseOpc) {
3183 default: break;
3184 case ARM::ADDrr:
3185 case ARM::SUBrr:
3186 if (UseOpc == ARM::SUBrr && Commute)
3187 return false;
3188
3189 // ADD/SUB are special because they're essentially the same operation, so
3190 // we can handle a larger range of immediates.
3191 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3192 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3193 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3194 ImmVal = -ImmVal;
3195 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3196 } else
3197 return false;
3198 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3199 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3200 break;
3201 case ARM::ORRrr:
3202 case ARM::EORrr:
3203 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3204 return false;
3205 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3206 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3207 switch (UseOpc) {
3208 default: break;
3209 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3210 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3211 }
3212 break;
3213 case ARM::t2ADDrr:
3214 case ARM::t2SUBrr: {
3215 if (UseOpc == ARM::t2SUBrr && Commute)
3216 return false;
3217
3218 // ADD/SUB are special because they're essentially the same operation, so
3219 // we can handle a larger range of immediates.
3220 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3221 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3222 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3223 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3224 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3225 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3226 ImmVal = -ImmVal;
3227 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3228 } else
3229 return false;
3230 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3231 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3232 break;
3233 }
3234 case ARM::t2ORRrr:
3235 case ARM::t2EORrr:
3236 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3237 return false;
3238 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3239 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3240 switch (UseOpc) {
3241 default: break;
3242 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3243 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3244 }
3245 break;
3246 }
3247 }
3248 }
3249
3250 unsigned OpIdx = Commute ? 2 : 1;
3251 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3252 bool isKill = UseMI.getOperand(OpIdx).isKill();
3253 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3254 Register NewReg = MRI->createVirtualRegister(TRC);
3255 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3256 NewReg)
3257 .addReg(Reg1, getKillRegState(isKill))
3258 .addImm(SOImmValV1)
3260 .add(condCodeOp());
3261 UseMI.setDesc(get(NewUseOpc));
3262 UseMI.getOperand(1).setReg(NewReg);
3263 UseMI.getOperand(1).setIsKill();
3264 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3265 DefMI.eraseFromParent();
3266 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3267 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3268 // Then the below code will not be needed, as the input/output register
3269 // classes will be rgpr or gprSP.
3270 // For now, we fix the UseMI operand explicitly here:
3271 switch(NewUseOpc){
3272 case ARM::t2ADDspImm:
3273 case ARM::t2SUBspImm:
3274 case ARM::t2ADDri:
3275 case ARM::t2SUBri:
3276 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3277 }
3278 return true;
3279}
3280
3281static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3282 const MachineInstr &MI) {
3283 switch (MI.getOpcode()) {
3284 default: {
3285 const MCInstrDesc &Desc = MI.getDesc();
3286 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3287 assert(UOps >= 0 && "bad # UOps");
3288 return UOps;
3289 }
3290
3291 case ARM::LDRrs:
3292 case ARM::LDRBrs:
3293 case ARM::STRrs:
3294 case ARM::STRBrs: {
3295 unsigned ShOpVal = MI.getOperand(3).getImm();
3296 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3297 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3298 if (!isSub &&
3299 (ShImm == 0 ||
3300 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3301 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3302 return 1;
3303 return 2;
3304 }
3305
3306 case ARM::LDRH:
3307 case ARM::STRH: {
3308 if (!MI.getOperand(2).getReg())
3309 return 1;
3310
3311 unsigned ShOpVal = MI.getOperand(3).getImm();
3312 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3313 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3314 if (!isSub &&
3315 (ShImm == 0 ||
3316 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3317 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3318 return 1;
3319 return 2;
3320 }
3321
3322 case ARM::LDRSB:
3323 case ARM::LDRSH:
3324 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3325
3326 case ARM::LDRSB_POST:
3327 case ARM::LDRSH_POST: {
3328 Register Rt = MI.getOperand(0).getReg();
3329 Register Rm = MI.getOperand(3).getReg();
3330 return (Rt == Rm) ? 4 : 3;
3331 }
3332
3333 case ARM::LDR_PRE_REG:
3334 case ARM::LDRB_PRE_REG: {
3335 Register Rt = MI.getOperand(0).getReg();
3336 Register Rm = MI.getOperand(3).getReg();
3337 if (Rt == Rm)
3338 return 3;
3339 unsigned ShOpVal = MI.getOperand(4).getImm();
3340 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3341 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3342 if (!isSub &&
3343 (ShImm == 0 ||
3344 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3345 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3346 return 2;
3347 return 3;
3348 }
3349
3350 case ARM::STR_PRE_REG:
3351 case ARM::STRB_PRE_REG: {
3352 unsigned ShOpVal = MI.getOperand(4).getImm();
3353 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3354 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3355 if (!isSub &&
3356 (ShImm == 0 ||
3357 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3358 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3359 return 2;
3360 return 3;
3361 }
3362
3363 case ARM::LDRH_PRE:
3364 case ARM::STRH_PRE: {
3365 Register Rt = MI.getOperand(0).getReg();
3366 Register Rm = MI.getOperand(3).getReg();
3367 if (!Rm)
3368 return 2;
3369 if (Rt == Rm)
3370 return 3;
3371 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3372 }
3373
3374 case ARM::LDR_POST_REG:
3375 case ARM::LDRB_POST_REG:
3376 case ARM::LDRH_POST: {
3377 Register Rt = MI.getOperand(0).getReg();
3378 Register Rm = MI.getOperand(3).getReg();
3379 return (Rt == Rm) ? 3 : 2;
3380 }
3381
3382 case ARM::LDR_PRE_IMM:
3383 case ARM::LDRB_PRE_IMM:
3384 case ARM::LDR_POST_IMM:
3385 case ARM::LDRB_POST_IMM:
3386 case ARM::STRB_POST_IMM:
3387 case ARM::STRB_POST_REG:
3388 case ARM::STRB_PRE_IMM:
3389 case ARM::STRH_POST:
3390 case ARM::STR_POST_IMM:
3391 case ARM::STR_POST_REG:
3392 case ARM::STR_PRE_IMM:
3393 return 2;
3394
3395 case ARM::LDRSB_PRE:
3396 case ARM::LDRSH_PRE: {
3397 Register Rm = MI.getOperand(3).getReg();
3398 if (Rm == 0)
3399 return 3;
3400 Register Rt = MI.getOperand(0).getReg();
3401 if (Rt == Rm)
3402 return 4;
3403 unsigned ShOpVal = MI.getOperand(4).getImm();
3404 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3405 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3406 if (!isSub &&
3407 (ShImm == 0 ||
3408 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3409 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3410 return 3;
3411 return 4;
3412 }
3413
3414 case ARM::LDRD: {
3415 Register Rt = MI.getOperand(0).getReg();
3416 Register Rn = MI.getOperand(2).getReg();
3417 Register Rm = MI.getOperand(3).getReg();
3418 if (Rm)
3419 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3420 : 3;
3421 return (Rt == Rn) ? 3 : 2;
3422 }
3423
3424 case ARM::STRD: {
3425 Register Rm = MI.getOperand(3).getReg();
3426 if (Rm)
3427 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3428 : 3;
3429 return 2;
3430 }
3431
3432 case ARM::LDRD_POST:
3433 case ARM::t2LDRD_POST:
3434 return 3;
3435
3436 case ARM::STRD_POST:
3437 case ARM::t2STRD_POST:
3438 return 4;
3439
3440 case ARM::LDRD_PRE: {
3441 Register Rt = MI.getOperand(0).getReg();
3442 Register Rn = MI.getOperand(3).getReg();
3443 Register Rm = MI.getOperand(4).getReg();
3444 if (Rm)
3445 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3446 : 4;
3447 return (Rt == Rn) ? 4 : 3;
3448 }
3449
3450 case ARM::t2LDRD_PRE: {
3451 Register Rt = MI.getOperand(0).getReg();
3452 Register Rn = MI.getOperand(3).getReg();
3453 return (Rt == Rn) ? 4 : 3;
3454 }
3455
3456 case ARM::STRD_PRE: {
3457 Register Rm = MI.getOperand(4).getReg();
3458 if (Rm)
3459 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3460 : 4;
3461 return 3;
3462 }
3463
3464 case ARM::t2STRD_PRE:
3465 return 3;
3466
3467 case ARM::t2LDR_POST:
3468 case ARM::t2LDRB_POST:
3469 case ARM::t2LDRB_PRE:
3470 case ARM::t2LDRSBi12:
3471 case ARM::t2LDRSBi8:
3472 case ARM::t2LDRSBpci:
3473 case ARM::t2LDRSBs:
3474 case ARM::t2LDRH_POST:
3475 case ARM::t2LDRH_PRE:
3476 case ARM::t2LDRSBT:
3477 case ARM::t2LDRSB_POST:
3478 case ARM::t2LDRSB_PRE:
3479 case ARM::t2LDRSH_POST:
3480 case ARM::t2LDRSH_PRE:
3481 case ARM::t2LDRSHi12:
3482 case ARM::t2LDRSHi8:
3483 case ARM::t2LDRSHpci:
3484 case ARM::t2LDRSHs:
3485 return 2;
3486
3487 case ARM::t2LDRDi8: {
3488 Register Rt = MI.getOperand(0).getReg();
3489 Register Rn = MI.getOperand(2).getReg();
3490 return (Rt == Rn) ? 3 : 2;
3491 }
3492
3493 case ARM::t2STRB_POST:
3494 case ARM::t2STRB_PRE:
3495 case ARM::t2STRBs:
3496 case ARM::t2STRDi8:
3497 case ARM::t2STRH_POST:
3498 case ARM::t2STRH_PRE:
3499 case ARM::t2STRHs:
3500 case ARM::t2STR_POST:
3501 case ARM::t2STR_PRE:
3502 case ARM::t2STRs:
3503 return 2;
3504 }
3505}
3506
3507// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3508// can't be easily determined return 0 (missing MachineMemOperand).
3509//
3510// FIXME: The current MachineInstr design does not support relying on machine
3511// mem operands to determine the width of a memory access. Instead, we expect
3512// the target to provide this information based on the instruction opcode and
3513// operands. However, using MachineMemOperand is the best solution now for
3514// two reasons:
3515//
3516// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3517// operands. This is much more dangerous than using the MachineMemOperand
3518// sizes because CodeGen passes can insert/remove optional machine operands. In
3519// fact, it's totally incorrect for preRA passes and appears to be wrong for
3520// postRA passes as well.
3521//
3522// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3523// machine model that calls this should handle the unknown (zero size) case.
3524//
3525// Long term, we should require a target hook that verifies MachineMemOperand
3526// sizes during MC lowering. That target hook should be local to MC lowering
3527// because we can't ensure that it is aware of other MI forms. Doing this will
3528// ensure that MachineMemOperands are correctly propagated through all passes.
3530 unsigned Size = 0;
3531 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3532 E = MI.memoperands_end();
3533 I != E; ++I) {
3534 Size += (*I)->getSize().getValue();
3535 }
3536 // FIXME: The scheduler currently can't handle values larger than 16. But
3537 // the values can actually go up to 32 for floating-point load/store
3538 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3539 // operations isn't right; we could end up with "extra" memory operands for
3540 // various reasons, like tail merge merging two memory operations.
3541 return std::min(Size / 4, 16U);
3542}
3543
3545 unsigned NumRegs) {
3546 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3547 switch (Opc) {
3548 default:
3549 break;
3550 case ARM::VLDMDIA_UPD:
3551 case ARM::VLDMDDB_UPD:
3552 case ARM::VLDMSIA_UPD:
3553 case ARM::VLDMSDB_UPD:
3554 case ARM::VSTMDIA_UPD:
3555 case ARM::VSTMDDB_UPD:
3556 case ARM::VSTMSIA_UPD:
3557 case ARM::VSTMSDB_UPD:
3558 case ARM::LDMIA_UPD:
3559 case ARM::LDMDA_UPD:
3560 case ARM::LDMDB_UPD:
3561 case ARM::LDMIB_UPD:
3562 case ARM::STMIA_UPD:
3563 case ARM::STMDA_UPD:
3564 case ARM::STMDB_UPD:
3565 case ARM::STMIB_UPD:
3566 case ARM::tLDMIA_UPD:
3567 case ARM::tSTMIA_UPD:
3568 case ARM::t2LDMIA_UPD:
3569 case ARM::t2LDMDB_UPD:
3570 case ARM::t2STMIA_UPD:
3571 case ARM::t2STMDB_UPD:
3572 ++UOps; // One for base register writeback.
3573 break;
3574 case ARM::LDMIA_RET:
3575 case ARM::tPOP_RET:
3576 case ARM::t2LDMIA_RET:
3577 UOps += 2; // One for base reg wb, one for write to pc.
3578 break;
3579 }
3580 return UOps;
3581}
3582
3584 const MachineInstr &MI) const {
3585 if (!ItinData || ItinData->isEmpty())
3586 return 1;
3587
3588 const MCInstrDesc &Desc = MI.getDesc();
3589 unsigned Class = Desc.getSchedClass();
3590 int ItinUOps = ItinData->getNumMicroOps(Class);
3591 if (ItinUOps >= 0) {
3592 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3593 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3594
3595 return ItinUOps;
3596 }
3597
3598 unsigned Opc = MI.getOpcode();
3599 switch (Opc) {
3600 default:
3601 llvm_unreachable("Unexpected multi-uops instruction!");
3602 case ARM::VLDMQIA:
3603 case ARM::VSTMQIA:
3604 return 2;
3605
3606 // The number of uOps for load / store multiple are determined by the number
3607 // registers.
3608 //
3609 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3610 // same cycle. The scheduling for the first load / store must be done
3611 // separately by assuming the address is not 64-bit aligned.
3612 //
3613 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3614 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3615 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3616 case ARM::VLDMDIA:
3617 case ARM::VLDMDIA_UPD:
3618 case ARM::VLDMDDB_UPD:
3619 case ARM::VLDMSIA:
3620 case ARM::VLDMSIA_UPD:
3621 case ARM::VLDMSDB_UPD:
3622 case ARM::VSTMDIA:
3623 case ARM::VSTMDIA_UPD:
3624 case ARM::VSTMDDB_UPD:
3625 case ARM::VSTMSIA:
3626 case ARM::VSTMSIA_UPD:
3627 case ARM::VSTMSDB_UPD: {
3628 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3629 return (NumRegs / 2) + (NumRegs % 2) + 1;
3630 }
3631
3632 case ARM::LDMIA_RET:
3633 case ARM::LDMIA:
3634 case ARM::LDMDA:
3635 case ARM::LDMDB:
3636 case ARM::LDMIB:
3637 case ARM::LDMIA_UPD:
3638 case ARM::LDMDA_UPD:
3639 case ARM::LDMDB_UPD:
3640 case ARM::LDMIB_UPD:
3641 case ARM::STMIA:
3642 case ARM::STMDA:
3643 case ARM::STMDB:
3644 case ARM::STMIB:
3645 case ARM::STMIA_UPD:
3646 case ARM::STMDA_UPD:
3647 case ARM::STMDB_UPD:
3648 case ARM::STMIB_UPD:
3649 case ARM::tLDMIA:
3650 case ARM::tLDMIA_UPD:
3651 case ARM::tSTMIA_UPD:
3652 case ARM::tPOP_RET:
3653 case ARM::tPOP:
3654 case ARM::tPUSH:
3655 case ARM::t2LDMIA_RET:
3656 case ARM::t2LDMIA:
3657 case ARM::t2LDMDB:
3658 case ARM::t2LDMIA_UPD:
3659 case ARM::t2LDMDB_UPD:
3660 case ARM::t2STMIA:
3661 case ARM::t2STMDB:
3662 case ARM::t2STMIA_UPD:
3663 case ARM::t2STMDB_UPD: {
3664 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3665 switch (Subtarget.getLdStMultipleTiming()) {
3669 // Assume the worst.
3670 return NumRegs;
3672 if (NumRegs < 4)
3673 return 2;
3674 // 4 registers would be issued: 2, 2.
3675 // 5 registers would be issued: 2, 2, 1.
3676 unsigned UOps = (NumRegs / 2);
3677 if (NumRegs % 2)
3678 ++UOps;
3679 return UOps;
3680 }
3682 unsigned UOps = (NumRegs / 2);
3683 // If there are odd number of registers or if it's not 64-bit aligned,
3684 // then it takes an extra AGU (Address Generation Unit) cycle.
3685 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3686 (*MI.memoperands_begin())->getAlign() < Align(8))
3687 ++UOps;
3688 return UOps;
3689 }
3690 }
3691 }
3692 }
3693 llvm_unreachable("Didn't find the number of microops");
3694}
3695
3696std::optional<unsigned>
3697ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3698 const MCInstrDesc &DefMCID, unsigned DefClass,
3699 unsigned DefIdx, unsigned DefAlign) const {
3700 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3701 if (RegNo <= 0)
3702 // Def is the address writeback.
3703 return ItinData->getOperandCycle(DefClass, DefIdx);
3704
3705 unsigned DefCycle;
3706 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3707 // (regno / 2) + (regno % 2) + 1
3708 DefCycle = RegNo / 2 + 1;
3709 if (RegNo % 2)
3710 ++DefCycle;
3711 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3712 DefCycle = RegNo;
3713 bool isSLoad = false;
3714
3715 switch (DefMCID.getOpcode()) {
3716 default: break;
3717 case ARM::VLDMSIA:
3718 case ARM::VLDMSIA_UPD:
3719 case ARM::VLDMSDB_UPD:
3720 isSLoad = true;
3721 break;
3722 }
3723
3724 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3725 // then it takes an extra cycle.
3726 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3727 ++DefCycle;
3728 } else {
3729 // Assume the worst.
3730 DefCycle = RegNo + 2;
3731 }
3732
3733 return DefCycle;
3734}
3735
3736std::optional<unsigned>
3737ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3738 const MCInstrDesc &DefMCID, unsigned DefClass,
3739 unsigned DefIdx, unsigned DefAlign) const {
3740 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3741 if (RegNo <= 0)
3742 // Def is the address writeback.
3743 return ItinData->getOperandCycle(DefClass, DefIdx);
3744
3745 unsigned DefCycle;
3746 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3747 // 4 registers would be issued: 1, 2, 1.
3748 // 5 registers would be issued: 1, 2, 2.
3749 DefCycle = RegNo / 2;
3750 if (DefCycle < 1)
3751 DefCycle = 1;
3752 // Result latency is issue cycle + 2: E2.
3753 DefCycle += 2;
3754 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3755 DefCycle = (RegNo / 2);
3756 // If there are odd number of registers or if it's not 64-bit aligned,
3757 // then it takes an extra AGU (Address Generation Unit) cycle.
3758 if ((RegNo % 2) || DefAlign < 8)
3759 ++DefCycle;
3760 // Result latency is AGU cycles + 2.
3761 DefCycle += 2;
3762 } else {
3763 // Assume the worst.
3764 DefCycle = RegNo + 2;
3765 }
3766
3767 return DefCycle;
3768}
3769
3770std::optional<unsigned>
3771ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3772 const MCInstrDesc &UseMCID, unsigned UseClass,
3773 unsigned UseIdx, unsigned UseAlign) const {
3774 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3775 if (RegNo <= 0)
3776 return ItinData->getOperandCycle(UseClass, UseIdx);
3777
3778 unsigned UseCycle;
3779 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3780 // (regno / 2) + (regno % 2) + 1
3781 UseCycle = RegNo / 2 + 1;
3782 if (RegNo % 2)
3783 ++UseCycle;
3784 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3785 UseCycle = RegNo;
3786 bool isSStore = false;
3787
3788 switch (UseMCID.getOpcode()) {
3789 default: break;
3790 case ARM::VSTMSIA:
3791 case ARM::VSTMSIA_UPD:
3792 case ARM::VSTMSDB_UPD:
3793 isSStore = true;
3794 break;
3795 }
3796
3797 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3798 // then it takes an extra cycle.
3799 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3800 ++UseCycle;
3801 } else {
3802 // Assume the worst.
3803 UseCycle = RegNo + 2;
3804 }
3805
3806 return UseCycle;
3807}
3808
3809std::optional<unsigned>
3810ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3811 const MCInstrDesc &UseMCID, unsigned UseClass,
3812 unsigned UseIdx, unsigned UseAlign) const {
3813 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3814 if (RegNo <= 0)
3815 return ItinData->getOperandCycle(UseClass, UseIdx);
3816
3817 unsigned UseCycle;
3818 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3819 UseCycle = RegNo / 2;
3820 if (UseCycle < 2)
3821 UseCycle = 2;
3822 // Read in E3.
3823 UseCycle += 2;
3824 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3825 UseCycle = (RegNo / 2);
3826 // If there are odd number of registers or if it's not 64-bit aligned,
3827 // then it takes an extra AGU (Address Generation Unit) cycle.
3828 if ((RegNo % 2) || UseAlign < 8)
3829 ++UseCycle;
3830 } else {
3831 // Assume the worst.
3832 UseCycle = 1;
3833 }
3834 return UseCycle;
3835}
3836
3837std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3838 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3839 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3840 unsigned UseIdx, unsigned UseAlign) const {
3841 unsigned DefClass = DefMCID.getSchedClass();
3842 unsigned UseClass = UseMCID.getSchedClass();
3843
3844 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3845 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3846
3847 // This may be a def / use of a variable_ops instruction, the operand
3848 // latency might be determinable dynamically. Let the target try to
3849 // figure it out.
3850 std::optional<unsigned> DefCycle;
3851 bool LdmBypass = false;
3852 switch (DefMCID.getOpcode()) {
3853 default:
3854 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3855 break;
3856
3857 case ARM::VLDMDIA:
3858 case ARM::VLDMDIA_UPD:
3859 case ARM::VLDMDDB_UPD:
3860 case ARM::VLDMSIA:
3861 case ARM::VLDMSIA_UPD:
3862 case ARM::VLDMSDB_UPD:
3863 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3864 break;
3865
3866 case ARM::LDMIA_RET:
3867 case ARM::LDMIA:
3868 case ARM::LDMDA:
3869 case ARM::LDMDB:
3870 case ARM::LDMIB:
3871 case ARM::LDMIA_UPD:
3872 case ARM::LDMDA_UPD:
3873 case ARM::LDMDB_UPD:
3874 case ARM::LDMIB_UPD:
3875 case ARM::tLDMIA:
3876 case ARM::tLDMIA_UPD:
3877 case ARM::tPUSH:
3878 case ARM::t2LDMIA_RET:
3879 case ARM::t2LDMIA:
3880 case ARM::t2LDMDB:
3881 case ARM::t2LDMIA_UPD:
3882 case ARM::t2LDMDB_UPD:
3883 LdmBypass = true;
3884 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3885 break;
3886 }
3887
3888 if (!DefCycle)
3889 // We can't seem to determine the result latency of the def, assume it's 2.
3890 DefCycle = 2;
3891
3892 std::optional<unsigned> UseCycle;
3893 switch (UseMCID.getOpcode()) {
3894 default:
3895 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3896 break;
3897
3898 case ARM::VSTMDIA:
3899 case ARM::VSTMDIA_UPD:
3900 case ARM::VSTMDDB_UPD:
3901 case ARM::VSTMSIA:
3902 case ARM::VSTMSIA_UPD:
3903 case ARM::VSTMSDB_UPD:
3904 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3905 break;
3906
3907 case ARM::STMIA:
3908 case ARM::STMDA:
3909 case ARM::STMDB:
3910 case ARM::STMIB:
3911 case ARM::STMIA_UPD:
3912 case ARM::STMDA_UPD:
3913 case ARM::STMDB_UPD:
3914 case ARM::STMIB_UPD:
3915 case ARM::tSTMIA_UPD:
3916 case ARM::tPOP_RET:
3917 case ARM::tPOP:
3918 case ARM::t2STMIA:
3919 case ARM::t2STMDB:
3920 case ARM::t2STMIA_UPD:
3921 case ARM::t2STMDB_UPD:
3922 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3923 break;
3924 }
3925
3926 if (!UseCycle)
3927 // Assume it's read in the first stage.
3928 UseCycle = 1;
3929
3930 if (UseCycle > *DefCycle + 1)
3931 return std::nullopt;
3932
3933 UseCycle = *DefCycle - *UseCycle + 1;
3934 if (UseCycle > 0u) {
3935 if (LdmBypass) {
3936 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3937 // first def operand.
3938 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3939 UseClass, UseIdx))
3940 UseCycle = *UseCycle - 1;
3941 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3942 UseClass, UseIdx)) {
3943 UseCycle = *UseCycle - 1;
3944 }
3945 }
3946
3947 return UseCycle;
3948}
3949
3951 const MachineInstr *MI, unsigned Reg,
3952 unsigned &DefIdx, unsigned &Dist) {
3953 Dist = 0;
3954
3956 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3957 assert(II->isInsideBundle() && "Empty bundle?");
3958
3959 int Idx = -1;
3960 while (II->isInsideBundle()) {
3961 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
3962 if (Idx != -1)
3963 break;
3964 --II;
3965 ++Dist;
3966 }
3967
3968 assert(Idx != -1 && "Cannot find bundled definition!");
3969 DefIdx = Idx;
3970 return &*II;
3971}
3972
3974 const MachineInstr &MI, unsigned Reg,
3975 unsigned &UseIdx, unsigned &Dist) {
3976 Dist = 0;
3977
3979 assert(II->isInsideBundle() && "Empty bundle?");
3980 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
3981
3982 // FIXME: This doesn't properly handle multiple uses.
3983 int Idx = -1;
3984 while (II != E && II->isInsideBundle()) {
3985 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
3986 if (Idx != -1)
3987 break;
3988 if (II->getOpcode() != ARM::t2IT)
3989 ++Dist;
3990 ++II;
3991 }
3992
3993 if (Idx == -1) {
3994 Dist = 0;
3995 return nullptr;
3996 }
3997
3998 UseIdx = Idx;
3999 return &*II;
4000}
4001
4002/// Return the number of cycles to add to (or subtract from) the static
4003/// itinerary based on the def opcode and alignment. The caller will ensure that
4004/// adjusted latency is at least one cycle.
4005static int adjustDefLatency(const ARMSubtarget &Subtarget,
4006 const MachineInstr &DefMI,
4007 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4008 int Adjust = 0;
4009 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4010 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4011 // variants are one cycle cheaper.
4012 switch (DefMCID.getOpcode()) {
4013 default: break;
4014 case ARM::LDRrs:
4015 case ARM::LDRBrs: {
4016 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4017 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4018 if (ShImm == 0 ||
4019 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4020 --Adjust;
4021 break;
4022 }
4023 case ARM::t2LDRs:
4024 case ARM::t2LDRBs:
4025 case ARM::t2LDRHs:
4026 case ARM::t2LDRSHs: {
4027 // Thumb2 mode: lsl only.
4028 unsigned ShAmt = DefMI.getOperand(3).getImm();
4029 if (ShAmt == 0 || ShAmt == 2)
4030 --Adjust;
4031 break;
4032 }
4033 }
4034 } else if (Subtarget.isSwift()) {
4035 // FIXME: Properly handle all of the latency adjustments for address
4036 // writeback.
4037 switch (DefMCID.getOpcode()) {
4038 default: break;
4039 case ARM::LDRrs:
4040 case ARM::LDRBrs: {
4041 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4042 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4043 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4044 if (!isSub &&
4045 (ShImm == 0 ||
4046 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4047 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4048 Adjust -= 2;
4049 else if (!isSub &&
4050 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4051 --Adjust;
4052 break;
4053 }
4054 case ARM::t2LDRs:
4055 case ARM::t2LDRBs:
4056 case ARM::t2LDRHs:
4057 case ARM::t2LDRSHs: {
4058 // Thumb2 mode: lsl only.
4059 unsigned ShAmt = DefMI.getOperand(3).getImm();
4060 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4061 Adjust -= 2;
4062 break;
4063 }
4064 }
4065 }
4066
4067 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4068 switch (DefMCID.getOpcode()) {
4069 default: break;
4070 case ARM::VLD1q8:
4071 case ARM::VLD1q16:
4072 case ARM::VLD1q32:
4073 case ARM::VLD1q64:
4074 case ARM::VLD1q8wb_fixed:
4075 case ARM::VLD1q16wb_fixed:
4076 case ARM::VLD1q32wb_fixed:
4077 case ARM::VLD1q64wb_fixed:
4078 case ARM::VLD1q8wb_register:
4079 case ARM::VLD1q16wb_register:
4080 case ARM::VLD1q32wb_register:
4081 case ARM::VLD1q64wb_register:
4082 case ARM::VLD2d8:
4083 case ARM::VLD2d16:
4084 case ARM::VLD2d32:
4085 case ARM::VLD2q8:
4086 case ARM::VLD2q16:
4087 case ARM::VLD2q32:
4088 case ARM::VLD2d8wb_fixed:
4089 case ARM::VLD2d16wb_fixed:
4090 case ARM::VLD2d32wb_fixed:
4091 case ARM::VLD2q8wb_fixed:
4092 case ARM::VLD2q16wb_fixed:
4093 case ARM::VLD2q32wb_fixed:
4094 case ARM::VLD2d8wb_register:
4095 case ARM::VLD2d16wb_register:
4096 case ARM::VLD2d32wb_register:
4097 case ARM::VLD2q8wb_register:
4098 case ARM::VLD2q16wb_register:
4099 case ARM::VLD2q32wb_register:
4100 case ARM::VLD3d8:
4101 case ARM::VLD3d16:
4102 case ARM::VLD3d32:
4103 case ARM::VLD1d64T:
4104 case ARM::VLD3d8_UPD:
4105 case ARM::VLD3d16_UPD:
4106 case ARM::VLD3d32_UPD:
4107 case ARM::VLD1d64Twb_fixed:
4108 case ARM::VLD1d64Twb_register:
4109 case ARM::VLD3q8_UPD:
4110 case ARM::VLD3q16_UPD:
4111 case ARM::VLD3q32_UPD:
4112 case ARM::VLD4d8:
4113 case ARM::VLD4d16:
4114 case ARM::VLD4d32:
4115 case ARM::VLD1d64Q:
4116 case ARM::VLD4d8_UPD:
4117 case ARM::VLD4d16_UPD:
4118 case ARM::VLD4d32_UPD:
4119 case ARM::VLD1d64Qwb_fixed:
4120 case ARM::VLD1d64Qwb_register:
4121 case ARM::VLD4q8_UPD:
4122 case ARM::VLD4q16_UPD:
4123 case ARM::VLD4q32_UPD:
4124 case ARM::VLD1DUPq8:
4125 case ARM::VLD1DUPq16:
4126 case ARM::VLD1DUPq32:
4127 case ARM::VLD1DUPq8wb_fixed:
4128 case ARM::VLD1DUPq16wb_fixed:
4129 case ARM::VLD1DUPq32wb_fixed:
4130 case ARM::VLD1DUPq8wb_register:
4131 case ARM::VLD1DUPq16wb_register:
4132 case ARM::VLD1DUPq32wb_register:
4133 case ARM::VLD2DUPd8:
4134 case ARM::VLD2DUPd16:
4135 case ARM::VLD2DUPd32:
4136 case ARM::VLD2DUPd8wb_fixed:
4137 case ARM::VLD2DUPd16wb_fixed:
4138 case ARM::VLD2DUPd32wb_fixed:
4139 case ARM::VLD2DUPd8wb_register:
4140 case ARM::VLD2DUPd16wb_register:
4141 case ARM::VLD2DUPd32wb_register:
4142 case ARM::VLD4DUPd8:
4143 case ARM::VLD4DUPd16:
4144 case ARM::VLD4DUPd32:
4145 case ARM::VLD4DUPd8_UPD:
4146 case ARM::VLD4DUPd16_UPD:
4147 case ARM::VLD4DUPd32_UPD:
4148 case ARM::VLD1LNd8:
4149 case ARM::VLD1LNd16:
4150 case ARM::VLD1LNd32:
4151 case ARM::VLD1LNd8_UPD:
4152 case ARM::VLD1LNd16_UPD:
4153 case ARM::VLD1LNd32_UPD:
4154 case ARM::VLD2LNd8:
4155 case ARM::VLD2LNd16:
4156 case ARM::VLD2LNd32:
4157 case ARM::VLD2LNq16:
4158 case ARM::VLD2LNq32:
4159 case ARM::VLD2LNd8_UPD:
4160 case ARM::VLD2LNd16_UPD:
4161 case ARM::VLD2LNd32_UPD:
4162 case ARM::VLD2LNq16_UPD:
4163 case ARM::VLD2LNq32_UPD:
4164 case ARM::VLD4LNd8:
4165 case ARM::VLD4LNd16:
4166 case ARM::VLD4LNd32:
4167 case ARM::VLD4LNq16:
4168 case ARM::VLD4LNq32:
4169 case ARM::VLD4LNd8_UPD:
4170 case ARM::VLD4LNd16_UPD:
4171 case ARM::VLD4LNd32_UPD:
4172 case ARM::VLD4LNq16_UPD:
4173 case ARM::VLD4LNq32_UPD:
4174 // If the address is not 64-bit aligned, the latencies of these
4175 // instructions increases by one.
4176 ++Adjust;
4177 break;
4178 }
4179 }
4180 return Adjust;
4181}
4182
4184 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4185 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4186 // No operand latency. The caller may fall back to getInstrLatency.
4187 if (!ItinData || ItinData->isEmpty())
4188 return std::nullopt;
4189
4190 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4191 Register Reg = DefMO.getReg();
4192
4193 const MachineInstr *ResolvedDefMI = &DefMI;
4194 unsigned DefAdj = 0;
4195 if (DefMI.isBundle())
4196 ResolvedDefMI =
4197 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4198 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4199 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4200 return 1;
4201 }
4202
4203 const MachineInstr *ResolvedUseMI = &UseMI;
4204 unsigned UseAdj = 0;
4205 if (UseMI.isBundle()) {
4206 ResolvedUseMI =
4207 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4208 if (!ResolvedUseMI)
4209 return std::nullopt;
4210 }
4211
4212 return getOperandLatencyImpl(
4213 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4214 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4215}
4216
4217std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4218 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4219 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4220 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4221 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4222 if (Reg == ARM::CPSR) {
4223 if (DefMI.getOpcode() == ARM::FMSTAT) {
4224 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4225 return Subtarget.isLikeA9() ? 1 : 20;
4226 }
4227
4228 // CPSR set and branch can be paired in the same cycle.
4229 if (UseMI.isBranch())
4230 return 0;
4231
4232 // Otherwise it takes the instruction latency (generally one).
4233 unsigned Latency = getInstrLatency(ItinData, DefMI);
4234
4235 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4236 // its uses. Instructions which are otherwise scheduled between them may
4237 // incur a code size penalty (not able to use the CPSR setting 16-bit
4238 // instructions).
4239 if (Latency > 0 && Subtarget.isThumb2()) {
4240 const MachineFunction *MF = DefMI.getParent()->getParent();
4241 if (MF->getFunction().hasOptSize())
4242 --Latency;
4243 }
4244 return Latency;
4245 }
4246
4247 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4248 return std::nullopt;
4249
4250 unsigned DefAlign = DefMI.hasOneMemOperand()
4251 ? (*DefMI.memoperands_begin())->getAlign().value()
4252 : 0;
4253 unsigned UseAlign = UseMI.hasOneMemOperand()
4254 ? (*UseMI.memoperands_begin())->getAlign().value()
4255 : 0;
4256
4257 // Get the itinerary's latency if possible, and handle variable_ops.
4258 std::optional<unsigned> Latency = getOperandLatency(
4259 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4260 // Unable to find operand latency. The caller may resort to getInstrLatency.
4261 if (!Latency)
4262 return std::nullopt;
4263
4264 // Adjust for IT block position.
4265 int Adj = DefAdj + UseAdj;
4266
4267 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4268 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4269 if (Adj >= 0 || (int)*Latency > -Adj) {
4270 return *Latency + Adj;
4271 }
4272 // Return the itinerary latency, which may be zero but not less than zero.
4273 return Latency;
4274}
4275
4276std::optional<unsigned>
4278 SDNode *DefNode, unsigned DefIdx,
4279 SDNode *UseNode, unsigned UseIdx) const {
4280 if (!DefNode->isMachineOpcode())
4281 return 1;
4282
4283 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4284
4285 if (isZeroCost(DefMCID.Opcode))
4286 return 0;
4287
4288 if (!ItinData || ItinData->isEmpty())
4289 return DefMCID.mayLoad() ? 3 : 1;
4290
4291 if (!UseNode->isMachineOpcode()) {
4292 std::optional<unsigned> Latency =
4293 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4294 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4295 int Threshold = 1 + Adj;
4296 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4297 }
4298
4299 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4300 auto *DefMN = cast<MachineSDNode>(DefNode);
4301 unsigned DefAlign = !DefMN->memoperands_empty()
4302 ? (*DefMN->memoperands_begin())->getAlign().value()
4303 : 0;
4304 auto *UseMN = cast<MachineSDNode>(UseNode);
4305 unsigned UseAlign = !UseMN->memoperands_empty()
4306 ? (*UseMN->memoperands_begin())->getAlign().value()
4307 : 0;
4308 std::optional<unsigned> Latency = getOperandLatency(
4309 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4310 if (!Latency)
4311 return std::nullopt;
4312
4313 if (Latency > 1U &&
4314 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4315 Subtarget.isCortexA7())) {
4316 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4317 // variants are one cycle cheaper.
4318 switch (DefMCID.getOpcode()) {
4319 default: break;
4320 case ARM::LDRrs:
4321 case ARM::LDRBrs: {
4322 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4323 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4324 if (ShImm == 0 ||
4325 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4326 Latency = *Latency - 1;
4327 break;
4328 }
4329 case ARM::t2LDRs:
4330 case ARM::t2LDRBs:
4331 case ARM::t2LDRHs:
4332 case ARM::t2LDRSHs: {
4333 // Thumb2 mode: lsl only.
4334 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4335 if (ShAmt == 0 || ShAmt == 2)
4336 Latency = *Latency - 1;
4337 break;
4338 }
4339 }
4340 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4341 // FIXME: Properly handle all of the latency adjustments for address
4342 // writeback.
4343 switch (DefMCID.getOpcode()) {
4344 default: break;
4345 case ARM::LDRrs:
4346 case ARM::LDRBrs: {
4347 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4348 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4349 if (ShImm == 0 ||
4350 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4352 Latency = *Latency - 2;
4353 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4354 Latency = *Latency - 1;
4355 break;
4356 }
4357 case ARM::t2LDRs:
4358 case ARM::t2LDRBs:
4359 case ARM::t2LDRHs:
4360 case ARM::t2LDRSHs:
4361 // Thumb2 mode: lsl 0-3 only.
4362 Latency = *Latency - 2;
4363 break;
4364 }
4365 }
4366
4367 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4368 switch (DefMCID.getOpcode()) {
4369 default: break;
4370 case ARM::VLD1q8:
4371 case ARM::VLD1q16:
4372 case ARM::VLD1q32:
4373 case ARM::VLD1q64:
4374 case ARM::VLD1q8wb_register:
4375 case ARM::VLD1q16wb_register:
4376 case ARM::VLD1q32wb_register:
4377 case ARM::VLD1q64wb_register:
4378 case ARM::VLD1q8wb_fixed:
4379 case ARM::VLD1q16wb_fixed:
4380 case ARM::VLD1q32wb_fixed:
4381 case ARM::VLD1q64wb_fixed:
4382 case ARM::VLD2d8:
4383 case ARM::VLD2d16:
4384 case ARM::VLD2d32:
4385 case ARM::VLD2q8Pseudo:
4386 case ARM::VLD2q16Pseudo:
4387 case ARM::VLD2q32Pseudo:
4388 case ARM::VLD2d8wb_fixed:
4389 case ARM::VLD2d16wb_fixed:
4390 case ARM::VLD2d32wb_fixed:
4391 case ARM::VLD2q8PseudoWB_fixed:
4392 case ARM::VLD2q16PseudoWB_fixed:
4393 case ARM::VLD2q32PseudoWB_fixed:
4394 case ARM::VLD2d8wb_register:
4395 case ARM::VLD2d16wb_register:
4396 case ARM::VLD2d32wb_register:
4397 case ARM::VLD2q8PseudoWB_register:
4398 case ARM::VLD2q16PseudoWB_register:
4399 case ARM::VLD2q32PseudoWB_register:
4400 case ARM::VLD3d8Pseudo:
4401 case ARM::VLD3d16Pseudo:
4402 case ARM::VLD3d32Pseudo:
4403 case ARM::VLD1d8TPseudo:
4404 case ARM::VLD1d16TPseudo:
4405 case ARM::VLD1d32TPseudo:
4406 case ARM::VLD1d64TPseudo:
4407 case ARM::VLD1d64TPseudoWB_fixed:
4408 case ARM::VLD1d64TPseudoWB_register:
4409 case ARM::VLD3d8Pseudo_UPD:
4410 case ARM::VLD3d16Pseudo_UPD:
4411 case ARM::VLD3d32Pseudo_UPD:
4412 case ARM::VLD3q8Pseudo_UPD:
4413 case ARM::VLD3q16Pseudo_UPD:
4414 case ARM::VLD3q32Pseudo_UPD:
4415 case ARM::VLD3q8oddPseudo:
4416 case ARM::VLD3q16oddPseudo:
4417 case ARM::VLD3q32oddPseudo:
4418 case ARM::VLD3q8oddPseudo_UPD:
4419 case ARM::VLD3q16oddPseudo_UPD:
4420 case ARM::VLD3q32oddPseudo_UPD:
4421 case ARM::VLD4d8Pseudo:
4422 case ARM::VLD4d16Pseudo:
4423 case ARM::VLD4d32Pseudo:
4424 case ARM::VLD1d8QPseudo:
4425 case ARM::VLD1d16QPseudo:
4426 case ARM::VLD1d32QPseudo:
4427 case ARM::VLD1d64QPseudo:
4428 case ARM::VLD1d64QPseudoWB_fixed:
4429 case ARM::VLD1d64QPseudoWB_register:
4430 case ARM::VLD1q8HighQPseudo:
4431 case ARM::VLD1q8LowQPseudo_UPD:
4432 case ARM::VLD1q8HighTPseudo:
4433 case ARM::VLD1q8LowTPseudo_UPD:
4434 case ARM::VLD1q16HighQPseudo:
4435 case ARM::VLD1q16LowQPseudo_UPD:
4436 case ARM::VLD1q16HighTPseudo:
4437 case ARM::VLD1q16LowTPseudo_UPD:
4438 case ARM::VLD1q32HighQPseudo:
4439 case ARM::VLD1q32LowQPseudo_UPD:
4440 case ARM::VLD1q32HighTPseudo:
4441 case ARM::VLD1q32LowTPseudo_UPD:
4442 case ARM::VLD1q64HighQPseudo:
4443 case ARM::VLD1q64LowQPseudo_UPD:
4444 case ARM::VLD1q64HighTPseudo:
4445 case ARM::VLD1q64LowTPseudo_UPD:
4446 case ARM::VLD4d8Pseudo_UPD:
4447 case ARM::VLD4d16Pseudo_UPD:
4448 case ARM::VLD4d32Pseudo_UPD:
4449 case ARM::VLD4q8Pseudo_UPD:
4450 case ARM::VLD4q16Pseudo_UPD:
4451 case ARM::VLD4q32Pseudo_UPD:
4452 case ARM::VLD4q8oddPseudo:
4453 case ARM::VLD4q16oddPseudo:
4454 case ARM::VLD4q32oddPseudo:
4455 case ARM::VLD4q8oddPseudo_UPD:
4456 case ARM::VLD4q16oddPseudo_UPD:
4457 case ARM::VLD4q32oddPseudo_UPD:
4458 case ARM::VLD1DUPq8:
4459 case ARM::VLD1DUPq16:
4460 case ARM::VLD1DUPq32:
4461 case ARM::VLD1DUPq8wb_fixed:
4462 case ARM::VLD1DUPq16wb_fixed:
4463 case ARM::VLD1DUPq32wb_fixed:
4464 case ARM::VLD1DUPq8wb_register:
4465 case ARM::VLD1DUPq16wb_register:
4466 case ARM::VLD1DUPq32wb_register:
4467 case ARM::VLD2DUPd8:
4468 case ARM::VLD2DUPd16:
4469 case ARM::VLD2DUPd32:
4470 case ARM::VLD2DUPd8wb_fixed:
4471 case ARM::VLD2DUPd16wb_fixed:
4472 case ARM::VLD2DUPd32wb_fixed:
4473 case ARM::VLD2DUPd8wb_register:
4474 case ARM::VLD2DUPd16wb_register:
4475 case ARM::VLD2DUPd32wb_register:
4476 case ARM::VLD2DUPq8EvenPseudo:
4477 case ARM::VLD2DUPq8OddPseudo:
4478 case ARM::VLD2DUPq16EvenPseudo:
4479 case ARM::VLD2DUPq16OddPseudo:
4480 case ARM::VLD2DUPq32EvenPseudo:
4481 case ARM::VLD2DUPq32OddPseudo:
4482 case ARM::VLD3DUPq8EvenPseudo:
4483 case ARM::VLD3DUPq8OddPseudo:
4484 case ARM::VLD3DUPq16EvenPseudo:
4485 case ARM::VLD3DUPq16OddPseudo:
4486 case ARM::VLD3DUPq32EvenPseudo:
4487 case ARM::VLD3DUPq32OddPseudo:
4488 case ARM::VLD4DUPd8Pseudo:
4489 case ARM::VLD4DUPd16Pseudo:
4490 case ARM::VLD4DUPd32Pseudo:
4491 case ARM::VLD4DUPd8Pseudo_UPD:
4492 case ARM::VLD4DUPd16Pseudo_UPD:
4493 case ARM::VLD4DUPd32Pseudo_UPD:
4494 case ARM::VLD4DUPq8EvenPseudo:
4495 case ARM::VLD4DUPq8OddPseudo:
4496 case ARM::VLD4DUPq16EvenPseudo:
4497 case ARM::VLD4DUPq16OddPseudo:
4498 case ARM::VLD4DUPq32EvenPseudo:
4499 case ARM::VLD4DUPq32OddPseudo:
4500 case ARM::VLD1LNq8Pseudo:
4501 case ARM::VLD1LNq16Pseudo:
4502 case ARM::VLD1LNq32Pseudo:
4503 case ARM::VLD1LNq8Pseudo_UPD:
4504 case ARM::VLD1LNq16Pseudo_UPD:
4505 case ARM::VLD1LNq32Pseudo_UPD:
4506 case ARM::VLD2LNd8Pseudo:
4507 case ARM::VLD2LNd16Pseudo:
4508 case ARM::VLD2LNd32Pseudo:
4509 case ARM::VLD2LNq16Pseudo:
4510 case ARM::VLD2LNq32Pseudo:
4511 case ARM::VLD2LNd8Pseudo_UPD:
4512 case ARM::VLD2LNd16Pseudo_UPD:
4513 case ARM::VLD2LNd32Pseudo_UPD:
4514 case ARM::VLD2LNq16Pseudo_UPD:
4515 case ARM::VLD2LNq32Pseudo_UPD:
4516 case ARM::VLD4LNd8Pseudo:
4517 case ARM::VLD4LNd16Pseudo:
4518 case ARM::VLD4LNd32Pseudo:
4519 case ARM::VLD4LNq16Pseudo:
4520 case ARM::VLD4LNq32Pseudo:
4521 case ARM::VLD4LNd8Pseudo_UPD:
4522 case ARM::VLD4LNd16Pseudo_UPD:
4523 case ARM::VLD4LNd32Pseudo_UPD:
4524 case ARM::VLD4LNq16Pseudo_UPD:
4525 case ARM::VLD4LNq32Pseudo_UPD:
4526 // If the address is not 64-bit aligned, the latencies of these
4527 // instructions increases by one.
4528 Latency = *Latency + 1;
4529 break;
4530 }
4531
4532 return Latency;
4533}
4534
4535unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4536 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4537 MI.isImplicitDef())
4538 return 0;
4539
4540 if (MI.isBundle())
4541 return 0;
4542
4543 const MCInstrDesc &MCID = MI.getDesc();
4544
4545 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4546 !Subtarget.cheapPredicableCPSRDef())) {
4547 // When predicated, CPSR is an additional source operand for CPSR updating
4548 // instructions, this apparently increases their latencies.
4549 return 1;
4550 }
4551 return 0;
4552}
4553
4554unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4555 const MachineInstr &MI,
4556 unsigned *PredCost) const {
4557 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4558 MI.isImplicitDef())
4559 return 1;
4560
4561 // An instruction scheduler typically runs on unbundled instructions, however
4562 // other passes may query the latency of a bundled instruction.
4563 if (MI.isBundle()) {
4564 unsigned Latency = 0;
4566 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4567 while (++I != E && I->isInsideBundle()) {
4568 if (I->getOpcode() != ARM::t2IT)
4569 Latency += getInstrLatency(ItinData, *I, PredCost);
4570 }
4571 return Latency;
4572 }
4573
4574 const MCInstrDesc &MCID = MI.getDesc();
4575 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4576 !Subtarget.cheapPredicableCPSRDef()))) {
4577 // When predicated, CPSR is an additional source operand for CPSR updating
4578 // instructions, this apparently increases their latencies.
4579 *PredCost = 1;
4580 }
4581 // Be sure to call getStageLatency for an empty itinerary in case it has a
4582 // valid MinLatency property.
4583 if (!ItinData)
4584 return MI.mayLoad() ? 3 : 1;
4585
4586 unsigned Class = MCID.getSchedClass();
4587
4588 // For instructions with variable uops, use uops as latency.
4589 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4590 return getNumMicroOps(ItinData, MI);
4591
4592 // For the common case, fall back on the itinerary's latency.
4593 unsigned Latency = ItinData->getStageLatency(Class);
4594
4595 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4596 unsigned DefAlign =
4597 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4598 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4599 if (Adj >= 0 || (int)Latency > -Adj) {
4600 return Latency + Adj;
4601 }
4602 return Latency;
4603}
4604
4605unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4606 SDNode *Node) const {
4607 if (!Node->isMachineOpcode())
4608 return 1;
4609
4610 if (!ItinData || ItinData->isEmpty())
4611 return 1;
4612
4613 unsigned Opcode = Node->getMachineOpcode();
4614 switch (Opcode) {
4615 default:
4616 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4617 case ARM::VLDMQIA:
4618 case ARM::VSTMQIA:
4619 return 2;
4620 }
4621}
4622
4623bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4624 const MachineRegisterInfo *MRI,
4625 const MachineInstr &DefMI,
4626 unsigned DefIdx,
4627 const MachineInstr &UseMI,
4628 unsigned UseIdx) const {
4629 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4630 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4631 if (Subtarget.nonpipelinedVFP() &&
4632 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4633 return true;
4634
4635 // Hoist VFP / NEON instructions with 4 or higher latency.
4636 unsigned Latency =
4637 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4638 if (Latency <= 3)
4639 return false;
4640 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4641 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4642}
4643
4644bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4645 const MachineInstr &DefMI,
4646 unsigned DefIdx) const {
4647 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4648 if (!ItinData || ItinData->isEmpty())
4649 return false;
4650
4651 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4652 if (DDomain == ARMII::DomainGeneral) {
4653 unsigned DefClass = DefMI.getDesc().getSchedClass();
4654 std::optional<unsigned> DefCycle =
4655 ItinData->getOperandCycle(DefClass, DefIdx);
4656 return DefCycle && DefCycle <= 2U;
4657 }
4658 return false;
4659}
4660
4661bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4662 StringRef &ErrInfo) const {
4663 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4664 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4665 return false;
4666 }
4667 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4668 // Make sure we don't generate a lo-lo mov that isn't supported.
4669 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4670 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4671 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4672 return false;
4673 }
4674 }
4675 if (MI.getOpcode() == ARM::tPUSH ||
4676 MI.getOpcode() == ARM::tPOP ||
4677 MI.getOpcode() == ARM::tPOP_RET) {
4678 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4679 if (MO.isImplicit() || !MO.isReg())
4680 continue;
4681 Register Reg = MO.getReg();
4682 if (Reg < ARM::R0 || Reg > ARM::R7) {
4683 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4684 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4685 ErrInfo = "Unsupported register in Thumb1 push/pop";
4686 return false;
4687 }
4688 }
4689 }
4690 }
4691 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4692 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4693 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4694 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4695 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4696 return false;
4697 }
4698 }
4699
4700 // Check the address model by taking the first Imm operand and checking it is
4701 // legal for that addressing mode.
4703 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4704 switch (AddrMode) {
4705 default:
4706 break;
4714 case ARMII::AddrModeT2_i12: {
4715 uint32_t Imm = 0;
4716 for (auto Op : MI.operands()) {
4717 if (Op.isImm()) {
4718 Imm = Op.getImm();
4719 break;
4720 }
4721 }
4722 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4723 ErrInfo = "Incorrect AddrMode Imm for instruction";
4724 return false;
4725 }
4726 break;
4727 }
4728 }
4729 return true;
4730}
4731
4733 unsigned LoadImmOpc,
4734 unsigned LoadOpc) const {
4735 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4736 "ROPI/RWPI not currently supported with stack guard");
4737
4738 MachineBasicBlock &MBB = *MI->getParent();
4739 DebugLoc DL = MI->getDebugLoc();
4740 Register Reg = MI->getOperand(0).getReg();
4742 unsigned int Offset = 0;
4743
4744 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4745 assert(!Subtarget.isReadTPSoft() &&
4746 "TLS stack protector requires hardware TLS register");
4747
4748 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4749 .addImm(15)
4750 .addImm(0)
4751 .addImm(13)
4752 .addImm(0)
4753 .addImm(3)
4755
4756 Module &M = *MBB.getParent()->getFunction().getParent();
4757 Offset = M.getStackProtectorGuardOffset();
4758 if (Offset & ~0xfffU) {
4759 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4760 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4761 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4762 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4763 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4764 .addReg(Reg, RegState::Kill)
4765 .addImm(Offset & ~0xfffU)
4767 .addReg(0);
4768 Offset &= 0xfffU;
4769 }
4770 } else {
4771 const GlobalValue *GV =
4772 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4773 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4774
4775 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4776 if (Subtarget.isTargetMachO()) {
4777 TargetFlags |= ARMII::MO_NONLAZY;
4778 } else if (Subtarget.isTargetCOFF()) {
4779 if (GV->hasDLLImportStorageClass())
4780 TargetFlags |= ARMII::MO_DLLIMPORT;
4781 else if (IsIndirect)
4782 TargetFlags |= ARMII::MO_COFFSTUB;
4783 } else if (IsIndirect) {
4784 TargetFlags |= ARMII::MO_GOT;
4785 }
4786
4787 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4788 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4789 auto APSREncoding =
4790 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4791 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4792 .addImm(APSREncoding)
4794 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4795 .addGlobalAddress(GV, 0, TargetFlags);
4796 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4797 .addImm(APSREncoding)
4798 .addReg(CPSRSaveReg, RegState::Kill)
4800 } else {
4801 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4802 .addGlobalAddress(GV, 0, TargetFlags);
4803 }
4804
4805 if (IsIndirect) {
4806 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4807 MIB.addReg(Reg, RegState::Kill).addImm(0);
4808 auto Flags = MachineMemOperand::MOLoad |
4811 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4812 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4814 }
4815 }
4816
4817 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4818 MIB.addReg(Reg, RegState::Kill)
4819 .addImm(Offset)
4820 .cloneMemRefs(*MI)
4822}
4823
4824bool
4825ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4826 unsigned &AddSubOpc,
4827 bool &NegAcc, bool &HasLane) const {
4828 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4829 if (I == MLxEntryMap.end())
4830 return false;
4831
4832 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4833 MulOpc = Entry.MulOpc;
4834 AddSubOpc = Entry.AddSubOpc;
4835 NegAcc = Entry.NegAcc;
4836 HasLane = Entry.HasLane;
4837 return true;
4838}
4839
4840//===----------------------------------------------------------------------===//
4841// Execution domains.
4842//===----------------------------------------------------------------------===//
4843//
4844// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4845// and some can go down both. The vmov instructions go down the VFP pipeline,
4846// but they can be changed to vorr equivalents that are executed by the NEON
4847// pipeline.
4848//
4849// We use the following execution domain numbering:
4850//
4856
4857//
4858// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4859//
4860std::pair<uint16_t, uint16_t>
4862 // If we don't have access to NEON instructions then we won't be able
4863 // to swizzle anything to the NEON domain. Check to make sure.
4864 if (Subtarget.hasNEON()) {
4865 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4866 // if they are not predicated.
4867 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4868 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4869
4870 // CortexA9 is particularly picky about mixing the two and wants these
4871 // converted.
4872 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4873 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4874 MI.getOpcode() == ARM::VMOVS))
4875 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4876 }
4877 // No other instructions can be swizzled, so just determine their domain.
4878 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4879
4881 return std::make_pair(ExeNEON, 0);
4882
4883 // Certain instructions can go either way on Cortex-A8.
4884 // Treat them as NEON instructions.
4885 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4886 return std::make_pair(ExeNEON, 0);
4887
4889 return std::make_pair(ExeVFP, 0);
4890
4891 return std::make_pair(ExeGeneric, 0);
4892}
4893
4895 unsigned SReg, unsigned &Lane) {
4896 MCRegister DReg =
4897 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4898 Lane = 0;
4899
4900 if (DReg)
4901 return DReg;
4902
4903 Lane = 1;
4904 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4905
4906 assert(DReg && "S-register with no D super-register?");
4907 return DReg;
4908}
4909
4910/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4911/// set ImplicitSReg to a register number that must be marked as implicit-use or
4912/// zero if no register needs to be defined as implicit-use.
4913///
4914/// If the function cannot determine if an SPR should be marked implicit use or
4915/// not, it returns false.
4916///
4917/// This function handles cases where an instruction is being modified from taking
4918/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4919/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4920/// lane of the DPR).
4921///
4922/// If the other SPR is defined, an implicit-use of it should be added. Else,
4923/// (including the case where the DPR itself is defined), it should not.
4924///
4926 MachineInstr &MI, MCRegister DReg,
4927 unsigned Lane,
4928 MCRegister &ImplicitSReg) {
4929 // If the DPR is defined or used already, the other SPR lane will be chained
4930 // correctly, so there is nothing to be done.
4931 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4932 ImplicitSReg = MCRegister();
4933 return true;
4934 }
4935
4936 // Otherwise we need to go searching to see if the SPR is set explicitly.
4937 ImplicitSReg = TRI->getSubReg(DReg,
4938 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4940 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4941
4942 if (LQR == MachineBasicBlock::LQR_Live)
4943 return true;
4944 else if (LQR == MachineBasicBlock::LQR_Unknown)
4945 return false;
4946
4947 // If the register is known not to be live, there is no need to add an
4948 // implicit-use.
4949 ImplicitSReg = MCRegister();
4950 return true;
4951}
4952
4954 unsigned Domain) const {
4955 unsigned DstReg, SrcReg;
4956 MCRegister DReg;
4957 unsigned Lane;
4958 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4960 switch (MI.getOpcode()) {
4961 default:
4962 llvm_unreachable("cannot handle opcode!");
4963 break;
4964 case ARM::VMOVD:
4965 if (Domain != ExeNEON)
4966 break;
4967
4968 // Zap the predicate operands.
4969 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4970
4971 // Make sure we've got NEON instructions.
4972 assert(Subtarget.hasNEON() && "VORRd requires NEON");
4973
4974 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4975 DstReg = MI.getOperand(0).getReg();
4976 SrcReg = MI.getOperand(1).getReg();
4977
4978 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4979 MI.removeOperand(i - 1);
4980
4981 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4982 MI.setDesc(get(ARM::VORRd));
4983 MIB.addReg(DstReg, RegState::Define)
4984 .addReg(SrcReg)
4985 .addReg(SrcReg)
4987 break;
4988 case ARM::VMOVRS:
4989 if (Domain != ExeNEON)
4990 break;
4991 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4992
4993 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4994 DstReg = MI.getOperand(0).getReg();
4995 SrcReg = MI.getOperand(1).getReg();
4996
4997 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4998 MI.removeOperand(i - 1);
4999
5000 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5001
5002 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5003 // Note that DSrc has been widened and the other lane may be undef, which
5004 // contaminates the entire register.
5005 MI.setDesc(get(ARM::VGETLNi32));
5006 MIB.addReg(DstReg, RegState::Define)
5007 .addReg(DReg, RegState::Undef)
5008 .addImm(Lane)
5010
5011 // The old source should be an implicit use, otherwise we might think it
5012 // was dead before here.
5013 MIB.addReg(SrcReg, RegState::Implicit);
5014 break;
5015 case ARM::VMOVSR: {
5016 if (Domain != ExeNEON)
5017 break;
5018 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5019
5020 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5021 DstReg = MI.getOperand(0).getReg();
5022 SrcReg = MI.getOperand(1).getReg();
5023
5024 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5025
5026 MCRegister ImplicitSReg;
5027 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5028 break;
5029
5030 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5031 MI.removeOperand(i - 1);
5032
5033 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5034 // Again DDst may be undefined at the beginning of this instruction.
5035 MI.setDesc(get(ARM::VSETLNi32));
5036 MIB.addReg(DReg, RegState::Define)
5037 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5038 .addReg(SrcReg)
5039 .addImm(Lane)
5041
5042 // The narrower destination must be marked as set to keep previous chains
5043 // in place.
5045 if (ImplicitSReg)
5046 MIB.addReg(ImplicitSReg, RegState::Implicit);
5047 break;
5048 }
5049 case ARM::VMOVS: {
5050 if (Domain != ExeNEON)
5051 break;
5052
5053 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5054 DstReg = MI.getOperand(0).getReg();
5055 SrcReg = MI.getOperand(1).getReg();
5056
5057 unsigned DstLane = 0, SrcLane = 0;
5058 MCRegister DDst, DSrc;
5059 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5060 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5061
5062 MCRegister ImplicitSReg;
5063 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5064 break;
5065
5066 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5067 MI.removeOperand(i - 1);
5068
5069 if (DSrc == DDst) {
5070 // Destination can be:
5071 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5072 MI.setDesc(get(ARM::VDUPLN32d));
5073 MIB.addReg(DDst, RegState::Define)
5074 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5075 .addImm(SrcLane)
5077
5078 // Neither the source or the destination are naturally represented any
5079 // more, so add them in manually.
5081 MIB.addReg(SrcReg, RegState::Implicit);
5082 if (ImplicitSReg)
5083 MIB.addReg(ImplicitSReg, RegState::Implicit);
5084 break;
5085 }
5086
5087 // In general there's no single instruction that can perform an S <-> S
5088 // move in NEON space, but a pair of VEXT instructions *can* do the
5089 // job. It turns out that the VEXTs needed will only use DSrc once, with
5090 // the position based purely on the combination of lane-0 and lane-1
5091 // involved. For example
5092 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5093 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5094 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5095 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5096 //
5097 // Pattern of the MachineInstrs is:
5098 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5099 MachineInstrBuilder NewMIB;
5100 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5101 DDst);
5102
5103 // On the first instruction, both DSrc and DDst may be undef if present.
5104 // Specifically when the original instruction didn't have them as an
5105 // <imp-use>.
5106 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5107 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5108 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5109
5110 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5111 CurUndef = !MI.readsRegister(CurReg, TRI);
5112 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5113 .addImm(1)
5115
5116 if (SrcLane == DstLane)
5117 NewMIB.addReg(SrcReg, RegState::Implicit);
5118
5119 MI.setDesc(get(ARM::VEXTd32));
5120 MIB.addReg(DDst, RegState::Define);
5121
5122 // On the second instruction, DDst has definitely been defined above, so
5123 // it is not undef. DSrc, if present, can be undef as above.
5124 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5125 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5126 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5127
5128 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5129 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5130 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5131 .addImm(1)
5133
5134 if (SrcLane != DstLane)
5135 MIB.addReg(SrcReg, RegState::Implicit);
5136
5137 // As before, the original destination is no longer represented, add it
5138 // implicitly.
5140 if (ImplicitSReg != 0)
5141 MIB.addReg(ImplicitSReg, RegState::Implicit);
5142 break;
5143 }
5144 }
5145}
5146
5147//===----------------------------------------------------------------------===//
5148// Partial register updates
5149//===----------------------------------------------------------------------===//
5150//
5151// Swift renames NEON registers with 64-bit granularity. That means any
5152// instruction writing an S-reg implicitly reads the containing D-reg. The
5153// problem is mostly avoided by translating f32 operations to v2f32 operations
5154// on D-registers, but f32 loads are still a problem.
5155//
5156// These instructions can load an f32 into a NEON register:
5157//
5158// VLDRS - Only writes S, partial D update.
5159// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5160// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5161//
5162// FCONSTD can be used as a dependency-breaking instruction.
5164 const MachineInstr &MI, unsigned OpNum,
5165 const TargetRegisterInfo *TRI) const {
5166 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5167 if (!PartialUpdateClearance)
5168 return 0;
5169
5170 assert(TRI && "Need TRI instance");
5171
5172 const MachineOperand &MO = MI.getOperand(OpNum);
5173 if (MO.readsReg())
5174 return 0;
5175 Register Reg = MO.getReg();
5176 int UseOp = -1;
5177
5178 switch (MI.getOpcode()) {
5179 // Normal instructions writing only an S-register.
5180 case ARM::VLDRS:
5181 case ARM::FCONSTS:
5182 case ARM::VMOVSR:
5183 case ARM::VMOVv8i8:
5184 case ARM::VMOVv4i16:
5185 case ARM::VMOVv2i32:
5186 case ARM::VMOVv2f32:
5187 case ARM::VMOVv1i64:
5188 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5189 break;
5190
5191 // Explicitly reads the dependency.
5192 case ARM::VLD1LNd32:
5193 UseOp = 3;
5194 break;
5195 default:
5196 return 0;
5197 }
5198
5199 // If this instruction actually reads a value from Reg, there is no unwanted
5200 // dependency.
5201 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5202 return 0;
5203
5204 // We must be able to clobber the whole D-reg.
5205 if (Reg.isVirtual()) {
5206 // Virtual register must be a def undef foo:ssub_0 operand.
5207 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5208 return 0;
5209 } else if (ARM::SPRRegClass.contains(Reg)) {
5210 // Physical register: MI must define the full D-reg.
5211 MCRegister DReg =
5212 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5213 if (!DReg || !MI.definesRegister(DReg, TRI))
5214 return 0;
5215 }
5216
5217 // MI has an unwanted D-register dependency.
5218 // Avoid defs in the previous N instructrions.
5219 return PartialUpdateClearance;
5220}
5221
5222// Break a partial register dependency after getPartialRegUpdateClearance
5223// returned non-zero.
5225 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5226 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5227 assert(TRI && "Need TRI instance");
5228
5229 const MachineOperand &MO = MI.getOperand(OpNum);
5230 Register Reg = MO.getReg();
5231 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5232 unsigned DReg = Reg;
5233
5234 // If MI defines an S-reg, find the corresponding D super-register.
5235 if (ARM::SPRRegClass.contains(Reg)) {
5236 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5237 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5238 }
5239
5240 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5241 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5242
5243 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5244 // the full D-register by loading the same value to both lanes. The
5245 // instruction is micro-coded with 2 uops, so don't do this until we can
5246 // properly schedule micro-coded instructions. The dispatcher stalls cause
5247 // too big regressions.
5248
5249 // Insert the dependency-breaking FCONSTD before MI.
5250 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5251 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5252 .addImm(96)
5254 MI.addRegisterKilled(DReg, TRI, true);
5255}
5256
5258 return Subtarget.hasFeature(ARM::HasV6KOps);
5259}
5260
5262 if (MI->getNumOperands() < 4)
5263 return true;
5264 unsigned ShOpVal = MI->getOperand(3).getImm();
5265 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5266 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5267 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5268 ((ShImm == 1 || ShImm == 2) &&
5269 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5270 return true;
5271
5272 return false;
5273}
5274
5276 const MachineInstr &MI, unsigned DefIdx,
5277 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5278 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5279 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5280
5281 switch (MI.getOpcode()) {
5282 case ARM::VMOVDRR:
5283 // dX = VMOVDRR rY, rZ
5284 // is the same as:
5285 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5286 // Populate the InputRegs accordingly.
5287 // rY
5288 const MachineOperand *MOReg = &MI.getOperand(1);
5289 if (!MOReg->isUndef())
5290 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5291 MOReg->getSubReg(), ARM::ssub_0));
5292 // rZ
5293 MOReg = &MI.getOperand(2);
5294 if (!MOReg->isUndef())
5295 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5296 MOReg->getSubReg(), ARM::ssub_1));
5297 return true;
5298 }
5299 llvm_unreachable("Target dependent opcode missing");
5300}
5301
5303 const MachineInstr &MI, unsigned DefIdx,
5304 RegSubRegPairAndIdx &InputReg) const {
5305 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5306 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5307
5308 switch (MI.getOpcode()) {
5309 case ARM::VMOVRRD:
5310 // rX, rY = VMOVRRD dZ
5311 // is the same as:
5312 // rX = EXTRACT_SUBREG dZ, ssub_0
5313 // rY = EXTRACT_SUBREG dZ, ssub_1
5314 const MachineOperand &MOReg = MI.getOperand(2);
5315 if (MOReg.isUndef())
5316 return false;
5317 InputReg.Reg = MOReg.getReg();
5318 InputReg.SubReg = MOReg.getSubReg();
5319 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5320 return true;
5321 }
5322 llvm_unreachable("Target dependent opcode missing");
5323}
5324
5326 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5327 RegSubRegPairAndIdx &InsertedReg) const {
5328 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5329 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5330
5331 switch (MI.getOpcode()) {
5332 case ARM::VSETLNi32:
5333 case ARM::MVE_VMOV_to_lane_32:
5334 // dX = VSETLNi32 dY, rZ, imm
5335 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5336 const MachineOperand &MOBaseReg = MI.getOperand(1);
5337 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5338 if (MOInsertedReg.isUndef())
5339 return false;
5340 const MachineOperand &MOIndex = MI.getOperand(3);
5341 BaseReg.Reg = MOBaseReg.getReg();
5342 BaseReg.SubReg = MOBaseReg.getSubReg();
5343
5344 InsertedReg.Reg = MOInsertedReg.getReg();
5345 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5346 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5347 return true;
5348 }
5349 llvm_unreachable("Target dependent opcode missing");
5350}
5351
5352std::pair<unsigned, unsigned>
5354 const unsigned Mask = ARMII::MO_OPTION_MASK;
5355 return std::make_pair(TF & Mask, TF & ~Mask);
5356}
5357
5360 using namespace ARMII;
5361
5362 static const std::pair<unsigned, const char *> TargetFlags[] = {
5363 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5364 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5365 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5366 };
5367 return ArrayRef(TargetFlags);
5368}
5369
5372 using namespace ARMII;
5373
5374 static const std::pair<unsigned, const char *> TargetFlags[] = {
5375 {MO_COFFSTUB, "arm-coffstub"},
5376 {MO_GOT, "arm-got"},
5377 {MO_SBREL, "arm-sbrel"},
5378 {MO_DLLIMPORT, "arm-dllimport"},
5379 {MO_SECREL, "arm-secrel"},
5380 {MO_NONLAZY, "arm-nonlazy"}};
5381 return ArrayRef(TargetFlags);
5382}
5383
5384std::optional<RegImmPair>
5386 int Sign = 1;
5387 unsigned Opcode = MI.getOpcode();
5388 int64_t Offset = 0;
5389
5390 // TODO: Handle cases where Reg is a super- or sub-register of the
5391 // destination register.
5392 const MachineOperand &Op0 = MI.getOperand(0);
5393 if (!Op0.isReg() || Reg != Op0.getReg())
5394 return std::nullopt;
5395
5396 // We describe SUBri or ADDri instructions.
5397 if (Opcode == ARM::SUBri)
5398 Sign = -1;
5399 else if (Opcode != ARM::ADDri)
5400 return std::nullopt;
5401
5402 // TODO: Third operand can be global address (usually some string). Since
5403 // strings can be relocated we cannot calculate their offsets for
5404 // now.
5405 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5406 return std::nullopt;
5407
5408 Offset = MI.getOperand(2).getImm() * Sign;
5409 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5410}
5411
5415 const TargetRegisterInfo *TRI) {
5416 for (auto I = From; I != To; ++I)
5417 if (I->modifiesRegister(Reg, TRI))
5418 return true;
5419 return false;
5420}
5421
5423 const TargetRegisterInfo *TRI) {
5424 // Search backwards to the instruction that defines CSPR. This may or not
5425 // be a CMP, we check that after this loop. If we find another instruction
5426 // that reads cpsr, we return nullptr.
5427 MachineBasicBlock::iterator CmpMI = Br;
5428 while (CmpMI != Br->getParent()->begin()) {
5429 --CmpMI;
5430 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5431 break;
5432 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5433 break;
5434 }
5435
5436 // Check that this inst is a CMP r[0-7], #0 and that the register
5437 // is not redefined between the cmp and the br.
5438 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5439 return nullptr;
5440 Register Reg = CmpMI->getOperand(0).getReg();
5441 Register PredReg;
5442 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5443 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5444 return nullptr;
5445 if (!isARMLowRegister(Reg))
5446 return nullptr;
5447 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5448 return nullptr;
5449
5450 return &*CmpMI;
5451}
5452
5454 const ARMSubtarget *Subtarget,
5455 bool ForCodesize) {
5456 if (Subtarget->isThumb()) {
5457 if (Val <= 255) // MOV
5458 return ForCodesize ? 2 : 1;
5459 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5460 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5461 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5462 return ForCodesize ? 4 : 1;
5463 if (Val <= 510) // MOV + ADDi8
5464 return ForCodesize ? 4 : 2;
5465 if (~Val <= 255) // MOV + MVN
5466 return ForCodesize ? 4 : 2;
5467 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5468 return ForCodesize ? 4 : 2;
5469 } else {
5470 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5471 return ForCodesize ? 4 : 1;
5472 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5473 return ForCodesize ? 4 : 1;
5474 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5475 return ForCodesize ? 4 : 1;
5476 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5477 return ForCodesize ? 8 : 2;
5478 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5479 return ForCodesize ? 8 : 2;
5480 }
5481 if (Subtarget->useMovt()) // MOVW + MOVT
5482 return ForCodesize ? 8 : 2;
5483 return ForCodesize ? 8 : 3; // Literal pool load
5484}
5485
5486bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5487 const ARMSubtarget *Subtarget,
5488 bool ForCodesize) {
5489 // Check with ForCodesize
5490 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5491 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5492 if (Cost1 < Cost2)
5493 return true;
5494 if (Cost1 > Cost2)
5495 return false;
5496
5497 // If they are equal, try with !ForCodesize
5498 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5499 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5500}
5501
5502/// Constants defining how certain sequences should be outlined.
5503/// This encompasses how an outlined function should be called, and what kind of
5504/// frame should be emitted for that outlined function.
5505///
5506/// \p MachineOutlinerTailCall implies that the function is being created from
5507/// a sequence of instructions ending in a return.
5508///
5509/// That is,
5510///
5511/// I1 OUTLINED_FUNCTION:
5512/// I2 --> B OUTLINED_FUNCTION I1
5513/// BX LR I2
5514/// BX LR
5515///
5516/// +-------------------------+--------+-----+
5517/// | | Thumb2 | ARM |
5518/// +-------------------------+--------+-----+
5519/// | Call overhead in Bytes | 4 | 4 |
5520/// | Frame overhead in Bytes | 0 | 0 |
5521/// | Stack fixup required | No | No |
5522/// +-------------------------+--------+-----+
5523///
5524/// \p MachineOutlinerThunk implies that the function is being created from
5525/// a sequence of instructions ending in a call. The outlined function is
5526/// called with a BL instruction, and the outlined function tail-calls the
5527/// original call destination.
5528///
5529/// That is,
5530///
5531/// I1 OUTLINED_FUNCTION:
5532/// I2 --> BL OUTLINED_FUNCTION I1
5533/// BL f I2
5534/// B f
5535///
5536/// +-------------------------+--------+-----+
5537/// | | Thumb2 | ARM |
5538/// +-------------------------+--------+-----+
5539/// | Call overhead in Bytes | 4 | 4 |
5540/// | Frame overhead in Bytes | 0 | 0 |
5541/// | Stack fixup required | No | No |
5542/// +-------------------------+--------+-----+
5543///
5544/// \p MachineOutlinerNoLRSave implies that the function should be called using
5545/// a BL instruction, but doesn't require LR to be saved and restored. This
5546/// happens when LR is known to be dead.
5547///
5548/// That is,
5549///
5550/// I1 OUTLINED_FUNCTION:
5551/// I2 --> BL OUTLINED_FUNCTION I1
5552/// I3 I2
5553/// I3
5554/// BX LR
5555///
5556/// +-------------------------+--------+-----+
5557/// | | Thumb2 | ARM |
5558/// +-------------------------+--------+-----+
5559/// | Call overhead in Bytes | 4 | 4 |
5560/// | Frame overhead in Bytes | 2 | 4 |
5561/// | Stack fixup required | No | No |
5562/// +-------------------------+--------+-----+
5563///
5564/// \p MachineOutlinerRegSave implies that the function should be called with a
5565/// save and restore of LR to an available register. This allows us to avoid
5566/// stack fixups. Note that this outlining variant is compatible with the
5567/// NoLRSave case.
5568///
5569/// That is,
5570///
5571/// I1 Save LR OUTLINED_FUNCTION:
5572/// I2 --> BL OUTLINED_FUNCTION I1
5573/// I3 Restore LR I2
5574/// I3
5575/// BX LR
5576///
5577/// +-------------------------+--------+-----+
5578/// | | Thumb2 | ARM |
5579/// +-------------------------+--------+-----+
5580/// | Call overhead in Bytes | 8 | 12 |
5581/// | Frame overhead in Bytes | 2 | 4 |
5582/// | Stack fixup required | No | No |
5583/// +-------------------------+--------+-----+
5584///
5585/// \p MachineOutlinerDefault implies that the function should be called with
5586/// a save and restore of LR to the stack.
5587///
5588/// That is,
5589///
5590/// I1 Save LR OUTLINED_FUNCTION:
5591/// I2 --> BL OUTLINED_FUNCTION I1
5592/// I3 Restore LR I2
5593/// I3
5594/// BX LR
5595///
5596/// +-------------------------+--------+-----+
5597/// | | Thumb2 | ARM |
5598/// +-------------------------+--------+-----+
5599/// | Call overhead in Bytes | 8 | 12 |
5600/// | Frame overhead in Bytes | 2 | 4 |
5601/// | Stack fixup required | Yes | Yes |
5602/// +-------------------------+--------+-----+
5603
5611
5617
5630
5632 : CallTailCall(target.isThumb() ? 4 : 4),
5633 FrameTailCall(target.isThumb() ? 0 : 0),
5634 CallThunk(target.isThumb() ? 4 : 4),
5635 FrameThunk(target.isThumb() ? 0 : 0),
5636 CallNoLRSave(target.isThumb() ? 4 : 4),
5637 FrameNoLRSave(target.isThumb() ? 2 : 4),
5638 CallRegSave(target.isThumb() ? 8 : 12),
5639 FrameRegSave(target.isThumb() ? 2 : 4),
5640 CallDefault(target.isThumb() ? 8 : 12),
5641 FrameDefault(target.isThumb() ? 2 : 4),
5642 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5643};
5644
5646ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5647 MachineFunction *MF = C.getMF();
5648 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5649 const ARMBaseRegisterInfo *ARI =
5650 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5651
5652 BitVector regsReserved = ARI->getReservedRegs(*MF);
5653 // Check if there is an available register across the sequence that we can
5654 // use.
5655 for (Register Reg : ARM::rGPRRegClass) {
5656 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5657 Reg != ARM::LR && // LR is not reserved, but don't use it.
5658 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5659 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5660 C.isAvailableInsideSeq(Reg, TRI))
5661 return Reg;
5662 }
5663 return Register();
5664}
5665
5666// Compute liveness of LR at the point after the interval [I, E), which
5667// denotes a *backward* iteration through instructions. Used only for return
5668// basic blocks, which do not end with a tail call.
5672 // At the end of the function LR dead.
5673 bool Live = false;
5674 for (; I != E; ++I) {
5675 const MachineInstr &MI = *I;
5676
5677 // Check defs of LR.
5678 if (MI.modifiesRegister(ARM::LR, &TRI))
5679 Live = false;
5680
5681 // Check uses of LR.
5682 unsigned Opcode = MI.getOpcode();
5683 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5684 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5685 Opcode == ARM::tBXNS_RET || Opcode == ARM::t2BXAUT_RET) {
5686 // These instructions use LR, but it's not an (explicit or implicit)
5687 // operand.
5688 Live = true;
5689 continue;
5690 }
5691 if (MI.readsRegister(ARM::LR, &TRI))
5692 Live = true;
5693 }
5694 return !Live;
5695}
5696
5697std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5699 const MachineModuleInfo &MMI,
5700 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5701 unsigned MinRepeats) const {
5702 unsigned SequenceSize = 0;
5703 for (auto &MI : RepeatedSequenceLocs[0])
5704 SequenceSize += getInstSizeInBytes(MI);
5705
5706 // Properties about candidate MBBs that hold for all of them.
5707 unsigned FlagsSetInAll = 0xF;
5708
5709 // Compute liveness information for each candidate, and set FlagsSetInAll.
5711 for (outliner::Candidate &C : RepeatedSequenceLocs)
5712 FlagsSetInAll &= C.Flags;
5713
5714 // According to the ARM Procedure Call Standard, the following are
5715 // undefined on entry/exit from a function call:
5716 //
5717 // * Register R12(IP),
5718 // * Condition codes (and thus the CPSR register)
5719 //
5720 // Since we control the instructions which are part of the outlined regions
5721 // we don't need to be fully compliant with the AAPCS, but we have to
5722 // guarantee that if a veneer is inserted at link time the code is still
5723 // correct. Because of this, we can't outline any sequence of instructions
5724 // where one of these registers is live into/across it. Thus, we need to
5725 // delete those candidates.
5726 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5727 // If the unsafe registers in this block are all dead, then we don't need
5728 // to compute liveness here.
5729 if (C.Flags & UnsafeRegsDead)
5730 return false;
5731 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5732 };
5733
5734 // Are there any candidates where those registers are live?
5735 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5736 // Erase every candidate that violates the restrictions above. (It could be
5737 // true that we have viable candidates, so it's not worth bailing out in
5738 // the case that, say, 1 out of 20 candidates violate the restructions.)
5739 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5740
5741 // If the sequence doesn't have enough candidates left, then we're done.
5742 if (RepeatedSequenceLocs.size() < MinRepeats)
5743 return std::nullopt;
5744 }
5745
5746 // We expect the majority of the outlining candidates to be in consensus with
5747 // regard to return address sign and authentication, and branch target
5748 // enforcement, in other words, partitioning according to all the four
5749 // possible combinations of PAC-RET and BTI is going to yield one big subset
5750 // and three small (likely empty) subsets. That allows us to cull incompatible
5751 // candidates separately for PAC-RET and BTI.
5752
5753 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5754 // disabled. Remove the candidates from the smaller set. If they are the same
5755 // number prefer the non-BTI ones for outlining, since they have less
5756 // overhead.
5757 auto NoBTI =
5758 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5759 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5760 return AFI.branchTargetEnforcement();
5761 });
5762 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5763 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5764 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5765 else
5766 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5767
5768 if (RepeatedSequenceLocs.size() < MinRepeats)
5769 return std::nullopt;
5770
5771 // Likewise, partition the candidates according to PAC-RET enablement.
5772 auto NoPAC =
5773 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5774 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5775 // If the function happens to not spill the LR, do not disqualify it
5776 // from the outlining.
5777 return AFI.shouldSignReturnAddress(true);
5778 });
5779 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5780 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5781 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5782 else
5783 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5784
5785 if (RepeatedSequenceLocs.size() < MinRepeats)
5786 return std::nullopt;
5787
5788 // At this point, we have only "safe" candidates to outline. Figure out
5789 // frame + call instruction information.
5790
5791 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5792
5793 // Helper lambda which sets call information for every candidate.
5794 auto SetCandidateCallInfo =
5795 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5796 for (outliner::Candidate &C : RepeatedSequenceLocs)
5797 C.setCallInfo(CallID, NumBytesForCall);
5798 };
5799
5800 OutlinerCosts Costs(Subtarget);
5801
5802 const auto &SomeMFI =
5803 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5804 // Adjust costs to account for the BTI instructions.
5805 if (SomeMFI.branchTargetEnforcement()) {
5806 Costs.FrameDefault += 4;
5807 Costs.FrameNoLRSave += 4;
5808 Costs.FrameRegSave += 4;
5809 Costs.FrameTailCall += 4;
5810 Costs.FrameThunk += 4;
5811 }
5812
5813 // Adjust costs to account for sign and authentication instructions.
5814 if (SomeMFI.shouldSignReturnAddress(true)) {
5815 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5816 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5817 }
5818
5819 unsigned FrameID = MachineOutlinerDefault;
5820 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5821
5822 // If the last instruction in any candidate is a terminator, then we should
5823 // tail call all of the candidates.
5824 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5825 FrameID = MachineOutlinerTailCall;
5826 NumBytesToCreateFrame = Costs.FrameTailCall;
5827 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5828 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5829 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5830 LastInstrOpcode == ARM::tBLXr ||
5831 LastInstrOpcode == ARM::tBLXr_noip ||
5832 LastInstrOpcode == ARM::tBLXi) {
5833 FrameID = MachineOutlinerThunk;
5834 NumBytesToCreateFrame = Costs.FrameThunk;
5835 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5836 } else {
5837 // We need to decide how to emit calls + frames. We can always emit the same
5838 // frame if we don't need to save to the stack. If we have to save to the
5839 // stack, then we need a different frame.
5840 unsigned NumBytesNoStackCalls = 0;
5841 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5842
5843 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5844 // LR liveness is overestimated in return blocks, unless they end with a
5845 // tail call.
5846 const auto Last = C.getMBB()->rbegin();
5847 const bool LRIsAvailable =
5848 C.getMBB()->isReturnBlock() && !Last->isCall()
5851 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
5852 if (LRIsAvailable) {
5853 FrameID = MachineOutlinerNoLRSave;
5854 NumBytesNoStackCalls += Costs.CallNoLRSave;
5855 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
5856 CandidatesWithoutStackFixups.push_back(C);
5857 }
5858
5859 // Is an unused register available? If so, we won't modify the stack, so
5860 // we can outline with the same frame type as those that don't save LR.
5861 else if (findRegisterToSaveLRTo(C)) {
5862 FrameID = MachineOutlinerRegSave;
5863 NumBytesNoStackCalls += Costs.CallRegSave;
5864 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
5865 CandidatesWithoutStackFixups.push_back(C);
5866 }
5867
5868 // Is SP used in the sequence at all? If not, we don't have to modify
5869 // the stack, so we are guaranteed to get the same frame.
5870 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
5871 NumBytesNoStackCalls += Costs.CallDefault;
5872 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5873 CandidatesWithoutStackFixups.push_back(C);
5874 }
5875
5876 // If we outline this, we need to modify the stack. Pretend we don't
5877 // outline this by saving all of its bytes.
5878 else
5879 NumBytesNoStackCalls += SequenceSize;
5880 }
5881
5882 // If there are no places where we have to save LR, then note that we don't
5883 // have to update the stack. Otherwise, give every candidate the default
5884 // call type
5885 if (NumBytesNoStackCalls <=
5886 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5887 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5888 FrameID = MachineOutlinerNoLRSave;
5889 if (RepeatedSequenceLocs.size() < MinRepeats)
5890 return std::nullopt;
5891 } else
5892 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5893 }
5894
5895 // Does every candidate's MBB contain a call? If so, then we might have a
5896 // call in the range.
5897 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5898 // check if the range contains a call. These require a save + restore of
5899 // the link register.
5900 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5901 if (any_of(drop_end(FirstCand),
5902 [](const MachineInstr &MI) { return MI.isCall(); }))
5903 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5904
5905 // Handle the last instruction separately. If it is tail call, then the
5906 // last instruction is a call, we don't want to save + restore in this
5907 // case. However, it could be possible that the last instruction is a
5908 // call without it being valid to tail call this sequence. We should
5909 // consider this as well.
5910 else if (FrameID != MachineOutlinerThunk &&
5911 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5912 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5913 }
5914
5915 return std::make_unique<outliner::OutlinedFunction>(
5916 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
5917}
5918
5919bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5920 int64_t Fixup,
5921 bool Updt) const {
5922 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
5923 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5924 if (SPIdx < 0)
5925 // No SP operand
5926 return true;
5927 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5928 // If SP is not the base register we can't do much
5929 return false;
5930
5931 // Stack might be involved but addressing mode doesn't handle any offset.
5932 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5933 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5934 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5935 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5936 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5937 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5938 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5939 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5940 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5941 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5943 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5944 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5945 return false;
5946
5947 unsigned NumOps = MI->getDesc().getNumOperands();
5948 unsigned ImmIdx = NumOps - 3;
5949
5950 const MachineOperand &Offset = MI->getOperand(ImmIdx);
5951 assert(Offset.isImm() && "Is not an immediate");
5952 int64_t OffVal = Offset.getImm();
5953
5954 if (OffVal < 0)
5955 // Don't override data if the are below SP.
5956 return false;
5957
5958 unsigned NumBits = 0;
5959 unsigned Scale = 1;
5960
5961 switch (AddrMode) {
5962 case ARMII::AddrMode3:
5963 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
5964 return false;
5965 OffVal = ARM_AM::getAM3Offset(OffVal);
5966 NumBits = 8;
5967 break;
5968 case ARMII::AddrMode5:
5969 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
5970 return false;
5971 OffVal = ARM_AM::getAM5Offset(OffVal);
5972 NumBits = 8;
5973 Scale = 4;
5974 break;
5976 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
5977 return false;
5978 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
5979 NumBits = 8;
5980 Scale = 2;
5981 break;
5983 NumBits = 8;
5984 break;
5986 // FIXME: Values are already scaled in this addressing mode.
5987 assert((Fixup & 3) == 0 && "Can't encode this offset!");
5988 NumBits = 10;
5989 break;
5991 NumBits = 8;
5992 Scale = 4;
5993 break;
5996 NumBits = 12;
5997 break;
5998 case ARMII::AddrModeT1_s: // SP-relative LD/ST
5999 NumBits = 8;
6000 Scale = 4;
6001 break;
6002 default:
6003 llvm_unreachable("Unsupported addressing mode!");
6004 }
6005 // Make sure the offset is encodable for instructions that scale the
6006 // immediate.
6007 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6008 "Can't encode this offset!");
6009 OffVal += Fixup / Scale;
6010
6011 unsigned Mask = (1 << NumBits) - 1;
6012
6013 if (OffVal <= Mask) {
6014 if (Updt)
6015 MI->getOperand(ImmIdx).setImm(OffVal);
6016 return true;
6017 }
6018
6019 return false;
6020}
6021
6023 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6024 outliner::Candidate &C = Candidates.front();
6025 // branch-target-enforcement is guaranteed to be consistent between all
6026 // candidates, so we only need to look at one.
6027 const Function &CFn = C.getMF()->getFunction();
6028 if (CFn.hasFnAttribute("branch-target-enforcement"))
6029 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6030
6031 if (CFn.hasFnAttribute("sign-return-address"))
6032 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6033
6034 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6035}
6036
6038 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6039 const Function &F = MF.getFunction();
6040
6041 // Can F be deduplicated by the linker? If it can, don't outline from it.
6042 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6043 return false;
6044
6045 // Don't outline from functions with section markings; the program could
6046 // expect that all the code is in the named section.
6047 // FIXME: Allow outlining from multiple functions with the same section
6048 // marking.
6049 if (F.hasSection())
6050 return false;
6051
6052 // FIXME: Thumb1 outlining is not handled
6054 return false;
6055
6056 // It's safe to outline from MF.
6057 return true;
6058}
6059
6061 unsigned &Flags) const {
6062 // Check if LR is available through all of the MBB. If it's not, then set
6063 // a flag.
6064 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6065 "Suitable Machine Function for outlining must track liveness");
6066
6068
6070 LRU.accumulate(MI);
6071
6072 // Check if each of the unsafe registers are available...
6073 bool R12AvailableInBlock = LRU.available(ARM::R12);
6074 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6075
6076 // If all of these are dead (and not live out), we know we don't have to check
6077 // them later.
6078 if (R12AvailableInBlock && CPSRAvailableInBlock)
6080
6081 // Now, add the live outs to the set.
6082 LRU.addLiveOuts(MBB);
6083
6084 // If any of these registers is available in the MBB, but also a live out of
6085 // the block, then we know outlining is unsafe.
6086 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6087 return false;
6088 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6089 return false;
6090
6091 // Check if there's a call inside this MachineBasicBlock. If there is, then
6092 // set a flag.
6093 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6095
6096 // LR liveness is overestimated in return blocks.
6097
6098 bool LRIsAvailable =
6099 MBB.isReturnBlock() && !MBB.back().isCall()
6100 ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
6101 : LRU.available(ARM::LR);
6102 if (!LRIsAvailable)
6104
6105 return true;
6106}
6107
6111 unsigned Flags) const {
6112 MachineInstr &MI = *MIT;
6114
6115 // PIC instructions contain labels, outlining them would break offset
6116 // computing. unsigned Opc = MI.getOpcode();
6117 unsigned Opc = MI.getOpcode();
6118 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6119 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6120 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6121 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6122 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6123 Opc == ARM::t2MOV_ga_pcrel)
6125
6126 // Be conservative with ARMv8.1 MVE instructions.
6127 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6128 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6129 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6130 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6131 Opc == ARM::t2LoopEndDec)
6133
6134 const MCInstrDesc &MCID = MI.getDesc();
6135 uint64_t MIFlags = MCID.TSFlags;
6136 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6138
6139 // Is this a terminator for a basic block?
6140 if (MI.isTerminator())
6141 // TargetInstrInfo::getOutliningType has already filtered out anything
6142 // that would break this, so we can allow it here.
6144
6145 // Don't outline if link register or program counter value are used.
6146 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6148
6149 if (MI.isCall()) {
6150 // Get the function associated with the call. Look at each operand and find
6151 // the one that represents the calle and get its name.
6152 const Function *Callee = nullptr;
6153 for (const MachineOperand &MOP : MI.operands()) {
6154 if (MOP.isGlobal()) {
6155 Callee = dyn_cast<Function>(MOP.getGlobal());
6156 break;
6157 }
6158 }
6159
6160 // Dont't outline calls to "mcount" like functions, in particular Linux
6161 // kernel function tracing relies on it.
6162 if (Callee &&
6163 (Callee->getName() == "\01__gnu_mcount_nc" ||
6164 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6166
6167 // If we don't know anything about the callee, assume it depends on the
6168 // stack layout of the caller. In that case, it's only legal to outline
6169 // as a tail-call. Explicitly list the call instructions we know about so
6170 // we don't get unexpected results with call pseudo-instructions.
6171 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6172 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6173 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6174 Opc == ARM::tBLXi)
6175 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6176
6177 if (!Callee)
6178 return UnknownCallOutlineType;
6179
6180 // We have a function we have information about. Check if it's something we
6181 // can safely outline.
6182 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6183
6184 // We don't know what's going on with the callee at all. Don't touch it.
6185 if (!CalleeMF)
6186 return UnknownCallOutlineType;
6187
6188 // Check if we know anything about the callee saves on the function. If we
6189 // don't, then don't touch it, since that implies that we haven't computed
6190 // anything about its stack frame yet.
6191 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6192 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6193 MFI.getNumObjects() > 0)
6194 return UnknownCallOutlineType;
6195
6196 // At this point, we can say that CalleeMF ought to not pass anything on the
6197 // stack. Therefore, we can outline it.
6199 }
6200
6201 // Since calls are handled, don't touch LR or PC
6202 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6204
6205 // Does this use the stack?
6206 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6207 // True if there is no chance that any outlined candidate from this range
6208 // could require stack fixups. That is, both
6209 // * LR is available in the range (No save/restore around call)
6210 // * The range doesn't include calls (No save/restore in outlined frame)
6211 // are true.
6212 // These conditions also ensure correctness of the return address
6213 // authentication - we insert sign and authentication instructions only if
6214 // we save/restore LR on stack, but then this condition ensures that the
6215 // outlined range does not modify the SP, therefore the SP value used for
6216 // signing is the same as the one used for authentication.
6217 // FIXME: This is very restrictive; the flags check the whole block,
6218 // not just the bit we will try to outline.
6219 bool MightNeedStackFixUp =
6222
6223 if (!MightNeedStackFixUp)
6225
6226 // Any modification of SP will break our code to save/restore LR.
6227 // FIXME: We could handle some instructions which add a constant offset to
6228 // SP, with a bit more work.
6229 if (MI.modifiesRegister(ARM::SP, TRI))
6231
6232 // At this point, we have a stack instruction that we might need to fix up.
6233 // up. We'll handle it if it's a load or store.
6234 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6235 false))
6237
6238 // We can't fix it up, so don't outline it.
6240 }
6241
6242 // Be conservative with IT blocks.
6243 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6244 MI.modifiesRegister(ARM::ITSTATE, TRI))
6246
6247 // Don't outline CFI instructions.
6248 if (MI.isCFIInstruction())
6250
6252}
6253
6254void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6255 for (MachineInstr &MI : MBB) {
6256 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6257 }
6258}
6259
6260void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6261 MachineBasicBlock::iterator It, bool CFI,
6262 bool Auth) const {
6263 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6264 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6265 assert(Align >= 8 && Align <= 256);
6266 if (Auth) {
6267 assert(Subtarget.isThumb2());
6268 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6269 // sequence.
6270 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6271 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6272 .addReg(ARM::R12, RegState::Kill)
6273 .addReg(ARM::LR, RegState::Kill)
6274 .addReg(ARM::SP)
6275 .addImm(-Align)
6277 .setMIFlags(MIFlags);
6278 } else {
6279 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6280 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6281 .addReg(ARM::LR, RegState::Kill)
6282 .addReg(ARM::SP)
6283 .addImm(-Align)
6285 .setMIFlags(MIFlags);
6286 }
6287
6288 if (!CFI)
6289 return;
6290
6291 // Add a CFI, saying CFA is offset by Align bytes from SP.
6292 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
6293 CFIBuilder.buildDefCFAOffset(Align);
6294
6295 // Add a CFI saying that the LR that we want to find is now higher than
6296 // before.
6297 int LROffset = Auth ? Align - 4 : Align;
6298 CFIBuilder.buildOffset(ARM::LR, -LROffset);
6299 if (Auth) {
6300 // Add a CFI for the location of the return address PAC.
6301 CFIBuilder.buildOffset(ARM::RA_AUTH_CODE, -Align);
6302 }
6303}
6304
6305void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6307 bool CFI, bool Auth) const {
6308 int Align = Subtarget.getStackAlignment().value();
6309 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6310 if (Auth) {
6311 assert(Subtarget.isThumb2());
6312 // Restore return address PAC and LR.
6313 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6314 .addReg(ARM::R12, RegState::Define)
6315 .addReg(ARM::LR, RegState::Define)
6316 .addReg(ARM::SP, RegState::Define)
6317 .addReg(ARM::SP)
6318 .addImm(Align)
6320 .setMIFlags(MIFlags);
6321 // LR authentication is after the CFI instructions, below.
6322 } else {
6323 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6324 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6325 .addReg(ARM::SP, RegState::Define)
6326 .addReg(ARM::SP);
6327 if (!Subtarget.isThumb())
6328 MIB.addReg(0);
6329 MIB.addImm(Subtarget.getStackAlignment().value())
6331 .setMIFlags(MIFlags);
6332 }
6333
6334 if (CFI) {
6335 // Now stack has moved back up and we have restored LR.
6336 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy);
6337 CFIBuilder.buildDefCFAOffset(0);
6338 CFIBuilder.buildRestore(ARM::LR);
6339 if (Auth)
6340 CFIBuilder.buildUndefined(ARM::RA_AUTH_CODE);
6341 }
6342
6343 if (Auth)
6344 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6345}
6346
6349 const outliner::OutlinedFunction &OF) const {
6350 // For thunk outlining, rewrite the last instruction from a call to a
6351 // tail-call.
6352 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6353 MachineInstr *Call = &*--MBB.instr_end();
6354 bool isThumb = Subtarget.isThumb();
6355 unsigned FuncOp = isThumb ? 2 : 0;
6356 unsigned Opc = Call->getOperand(FuncOp).isReg()
6357 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6358 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6359 : ARM::tTAILJMPdND
6360 : ARM::TAILJMPd;
6361 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6362 .add(Call->getOperand(FuncOp));
6363 if (isThumb && !Call->getOperand(FuncOp).isReg())
6364 MIB.add(predOps(ARMCC::AL));
6365 Call->eraseFromParent();
6366 }
6367
6368 // Is there a call in the outlined range?
6369 auto IsNonTailCall = [](MachineInstr &MI) {
6370 return MI.isCall() && !MI.isReturn();
6371 };
6372 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6373 MachineBasicBlock::iterator It = MBB.begin();
6375
6376 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6377 OF.FrameConstructionID == MachineOutlinerThunk)
6378 Et = std::prev(MBB.end());
6379
6380 // We have to save and restore LR, we need to add it to the liveins if it
6381 // is not already part of the set. This is sufficient since outlined
6382 // functions only have one block.
6383 if (!MBB.isLiveIn(ARM::LR))
6384 MBB.addLiveIn(ARM::LR);
6385
6386 // Insert a save before the outlined region
6387 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6388 saveLROnStack(MBB, It, true, Auth);
6389
6390 // Fix up the instructions in the range, since we're going to modify the
6391 // stack.
6392 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6393 "Can only fix up stack references once");
6394 fixupPostOutline(MBB);
6395
6396 // Insert a restore before the terminator for the function. Restore LR.
6397 restoreLRFromStack(MBB, Et, true, Auth);
6398 }
6399
6400 // If this is a tail call outlined function, then there's already a return.
6401 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6402 OF.FrameConstructionID == MachineOutlinerThunk)
6403 return;
6404
6405 // Here we have to insert the return ourselves. Get the correct opcode from
6406 // current feature set.
6407 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6409
6410 // Did we have to modify the stack by saving the link register?
6411 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6412 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6413 return;
6414
6415 // We modified the stack.
6416 // Walk over the basic block and fix up all the stack accesses.
6417 fixupPostOutline(MBB);
6418}
6419
6425 unsigned Opc;
6426 bool isThumb = Subtarget.isThumb();
6427
6428 // Are we tail calling?
6429 if (C.CallConstructionID == MachineOutlinerTailCall) {
6430 // If yes, then we can just branch to the label.
6431 Opc = isThumb
6432 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6433 : ARM::TAILJMPd;
6434 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6435 .addGlobalAddress(M.getNamedValue(MF.getName()));
6436 if (isThumb)
6437 MIB.add(predOps(ARMCC::AL));
6438 It = MBB.insert(It, MIB);
6439 return It;
6440 }
6441
6442 // Create the call instruction.
6443 Opc = isThumb ? ARM::tBL : ARM::BL;
6444 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6445 if (isThumb)
6446 CallMIB.add(predOps(ARMCC::AL));
6447 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6448
6449 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6450 C.CallConstructionID == MachineOutlinerThunk) {
6451 // No, so just insert the call.
6452 It = MBB.insert(It, CallMIB);
6453 return It;
6454 }
6455
6456 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6457 // Can we save to a register?
6458 if (C.CallConstructionID == MachineOutlinerRegSave) {
6459 Register Reg = findRegisterToSaveLRTo(C);
6460 assert(Reg != 0 && "No callee-saved register available?");
6461
6462 // Save and restore LR from that register.
6463 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6464 if (!AFI.isLRSpilled())
6466 .buildRegister(ARM::LR, Reg);
6467 CallPt = MBB.insert(It, CallMIB);
6468 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6469 if (!AFI.isLRSpilled())
6471 It--;
6472 return CallPt;
6473 }
6474 // We have the default case. Save and restore from SP.
6475 if (!MBB.isLiveIn(ARM::LR))
6476 MBB.addLiveIn(ARM::LR);
6477 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6478 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6479 CallPt = MBB.insert(It, CallMIB);
6480 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6481 It--;
6482 return CallPt;
6483}
6484
6486 MachineFunction &MF) const {
6487 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6488}
6489
6490bool ARMBaseInstrInfo::isReMaterializableImpl(
6491 const MachineInstr &MI) const {
6492 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6493 // the tail predication conversion. This means that the element count
6494 // register has to be live for longer, but that has to be better than
6495 // spill/restore and VPT predication.
6496 return (isVCTP(&MI) && !isPredicated(MI)) ||
6498}
6499
6501 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6502 : ARM::BLX;
6503}
6504
6506 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6507 : ARM::tBLXr;
6508}
6509
6511 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6512 : ARM::BLX_pred;
6513}
6514
6515namespace {
6516class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6517 MachineInstr *EndLoop, *LoopCount;
6518 MachineFunction *MF;
6519 const TargetInstrInfo *TII;
6520
6521 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6522 // [LAST_IS_USE] : last reference to register in schedule is a use
6523 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6524 static int constexpr MAX_STAGES = 30;
6525 static int constexpr LAST_IS_USE = MAX_STAGES;
6526 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6527 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6528 typedef std::map<Register, IterNeed> IterNeeds;
6529
6530 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6531 const IterNeeds &CIN);
6532 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6533
6534 // Meanings of the various stuff with loop types:
6535 // t2Bcc:
6536 // EndLoop = branch at end of original BB that will become a kernel
6537 // LoopCount = CC setter live into branch
6538 // t2LoopEnd:
6539 // EndLoop = branch at end of original BB
6540 // LoopCount = t2LoopDec
6541public:
6542 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6543 : EndLoop(EndLoop), LoopCount(LoopCount),
6544 MF(EndLoop->getParent()->getParent()),
6545 TII(MF->getSubtarget().getInstrInfo()) {}
6546
6547 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6548 // Only ignore the terminator.
6549 return MI == EndLoop || MI == LoopCount;
6550 }
6551
6552 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6553 if (tooMuchRegisterPressure(SSD, SMS))
6554 return false;
6555
6556 return true;
6557 }
6558
6559 std::optional<bool> createTripCountGreaterCondition(
6560 int TC, MachineBasicBlock &MBB,
6561 SmallVectorImpl<MachineOperand> &Cond) override {
6562
6563 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6564 Cond.push_back(EndLoop->getOperand(1));
6565 Cond.push_back(EndLoop->getOperand(2));
6566 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6568 }
6569 return {};
6570 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6571 // General case just lets the unrolled t2LoopDec do the subtraction and
6572 // therefore just needs to check if zero has been reached.
6573 MachineInstr *LoopDec = nullptr;
6574 for (auto &I : MBB.instrs())
6575 if (I.getOpcode() == ARM::t2LoopDec)
6576 LoopDec = &I;
6577 assert(LoopDec && "Unable to find copied LoopDec");
6578 // Check if we're done with the loop.
6579 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6580 .addReg(LoopDec->getOperand(0).getReg())
6581 .addImm(0)
6583 .addReg(ARM::NoRegister);
6585 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6586 return {};
6587 } else
6588 llvm_unreachable("Unknown EndLoop");
6589 }
6590
6591 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6592
6593 void adjustTripCount(int TripCountAdjust) override {}
6594};
6595
6596void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6597 const IterNeeds &CIN) {
6598 // Increase pressure by the amounts in CrossIterationNeeds
6599 for (const auto &N : CIN) {
6600 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6601 for (int I = 0; I < Cnt; ++I)
6604 }
6605 // Decrease pressure by the amounts in CrossIterationNeeds
6606 for (const auto &N : CIN) {
6607 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6608 for (int I = 0; I < Cnt; ++I)
6611 }
6612}
6613
6614bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6615 SMSchedule &SMS) {
6616 IterNeeds CrossIterationNeeds;
6617
6618 // Determine which values will be loop-carried after the schedule is
6619 // applied
6620
6621 for (auto &SU : SSD.SUnits) {
6622 const MachineInstr *MI = SU.getInstr();
6623 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6624 for (auto &S : SU.Succs)
6625 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6626 Register Reg = S.getReg();
6627 if (Reg.isVirtual())
6628 CrossIterationNeeds[Reg.id()].set(0);
6629 } else if (S.isAssignedRegDep()) {
6630 int OStg = SMS.stageScheduled(S.getSUnit());
6631 if (OStg >= 0 && OStg != Stg) {
6632 Register Reg = S.getReg();
6633 if (Reg.isVirtual())
6634 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6635 }
6636 }
6637 }
6638
6639 // Determine more-or-less what the proposed schedule (reversed) is going to
6640 // be; it might not be quite the same because the within-cycle ordering
6641 // created by SMSchedule depends upon changes to help with address offsets and
6642 // the like.
6643 std::vector<SUnit *> ProposedSchedule;
6644 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6645 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6646 ++Stage) {
6647 std::deque<SUnit *> Instrs =
6648 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6649 std::sort(Instrs.begin(), Instrs.end(),
6650 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6651 llvm::append_range(ProposedSchedule, Instrs);
6652 }
6653
6654 // Learn whether the last use/def of each cross-iteration register is a use or
6655 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6656 // and we do not have to add the pressure.
6657 for (auto *SU : ProposedSchedule)
6658 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6659 ++OperI) {
6660 auto MO = *OperI;
6661 if (!MO.isReg() || !MO.getReg())
6662 continue;
6663 Register Reg = MO.getReg();
6664 auto CIter = CrossIterationNeeds.find(Reg.id());
6665 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6666 CIter->second[SEEN_AS_LIVE])
6667 continue;
6668 if (MO.isDef() && !MO.isDead())
6669 CIter->second.set(SEEN_AS_LIVE);
6670 else if (MO.isUse())
6671 CIter->second.set(LAST_IS_USE);
6672 }
6673 for (auto &CI : CrossIterationNeeds)
6674 CI.second.reset(LAST_IS_USE);
6675
6676 RegionPressure RecRegPressure;
6677 RegPressureTracker RPTracker(RecRegPressure);
6678 RegisterClassInfo RegClassInfo;
6679 RegClassInfo.runOnMachineFunction(*MF);
6680 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6681 EndLoop->getParent()->end(), false, false);
6682
6683 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6684
6685 for (auto *SU : ProposedSchedule) {
6686 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6687 RPTracker.setPos(std::next(CurInstI));
6688 RPTracker.recede();
6689
6690 // Track what cross-iteration registers would be seen as live
6691 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6692 auto MO = *OperI;
6693 if (!MO.isReg() || !MO.getReg())
6694 continue;
6695 Register Reg = MO.getReg();
6696 if (MO.isDef() && !MO.isDead()) {
6697 auto CIter = CrossIterationNeeds.find(Reg.id());
6698 if (CIter != CrossIterationNeeds.end()) {
6699 CIter->second.reset(0);
6700 CIter->second.reset(SEEN_AS_LIVE);
6701 }
6702 }
6703 }
6704 for (auto &S : SU->Preds) {
6705 auto Stg = SMS.stageScheduled(SU);
6706 if (S.isAssignedRegDep()) {
6707 Register Reg = S.getReg();
6708 auto CIter = CrossIterationNeeds.find(Reg.id());
6709 if (CIter != CrossIterationNeeds.end()) {
6710 auto Stg2 = SMS.stageScheduled(S.getSUnit());
6711 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6712 if (Stg - Stg2 < MAX_STAGES)
6713 CIter->second.set(Stg - Stg2);
6714 CIter->second.set(SEEN_AS_LIVE);
6715 }
6716 }
6717 }
6718
6719 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6720 }
6721
6722 auto &P = RPTracker.getPressure().MaxSetPressure;
6723 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6724 // Exclude some Neon register classes.
6725 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6726 I == ARM::DTriple_with_qsub_0_in_QPR)
6727 continue;
6728
6729 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6730 return true;
6731 }
6732 }
6733 return false;
6734}
6735
6736} // namespace
6737
6738std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6741 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6742 if (Preheader == LoopBB)
6743 Preheader = *std::next(LoopBB->pred_begin());
6744
6745 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6746 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6747 // block. We need to determine the reaching definition of CPSR so that
6748 // it can be marked as non-pipelineable, allowing the pipeliner to force
6749 // it into stage 0 or give up if it cannot or will not do so.
6750 MachineInstr *CCSetter = nullptr;
6751 for (auto &L : LoopBB->instrs()) {
6752 if (L.isCall())
6753 return nullptr;
6754 if (isCPSRDefined(L))
6755 CCSetter = &L;
6756 }
6757 if (CCSetter)
6758 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6759 else
6760 return nullptr; // Unable to find the CC setter, so unable to guarantee
6761 // that pipeline will work
6762 }
6763
6764 // Recognize:
6765 // preheader:
6766 // %1 = t2DoopLoopStart %0
6767 // loop:
6768 // %2 = phi %1, <not loop>, %..., %loop
6769 // %3 = t2LoopDec %2, <imm>
6770 // t2LoopEnd %3, %loop
6771
6772 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6773 for (auto &L : LoopBB->instrs())
6774 if (L.isCall())
6775 return nullptr;
6776 else if (isVCTP(&L))
6777 return nullptr;
6778 Register LoopDecResult = I->getOperand(0).getReg();
6779 MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
6780 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
6781 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6782 return nullptr;
6783 MachineInstr *LoopStart = nullptr;
6784 for (auto &J : Preheader->instrs())
6785 if (J.getOpcode() == ARM::t2DoLoopStart)
6786 LoopStart = &J;
6787 if (!LoopStart)
6788 return nullptr;
6789 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
6790 }
6791 return nullptr;
6792}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, RegState State) const
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const ARMBaseRegisterInfo & getRegisterInfo() const
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
ARMBaseInstrInfo(const ARMSubtarget &STI, const ARMBaseRegisterInfo &TRI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isCortexA7() const
bool isSwift() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool isThumb2() const
bool isLikeA9() const
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool hasMinSize() const
bool isCortexA8() const
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
@ SingleIssue
Can load/store 1 register/cycle.
@ DoubleIssue
Can load/store 2 registers/cycle.
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:178
LLVM_ABI uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
Helper class for creating CFI instructions and inserting them into MIR.
void buildRegister(MCRegister Reg1, MCRegister Reg2) const
void buildRestore(MCRegister Reg) const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A debug info location.
Definition DebugLoc.h:123
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool hasDLLImportStorageClass() const
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
Describe properties that are true of each instruction in the target description file.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isCall() const
Return true if the instruction is a call.
unsigned getOpcode() const
Return the opcode number for this descriptor.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool isValid() const
isValid - Returns true until all the operands have been visited.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
union llvm::MachineConstantPoolEntry::@004270020304201266316354007027341142157160323045 Val
The constant itself.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
ArrayRef< MachineMemOperand * >::iterator mmo_iterator
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
bool isInsertSubreg() const
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void increaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
LLVM_ABI void decreaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition Register.h:60
constexpr unsigned id() const
Definition Register.h:100
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
ARMII - This namespace holds all of the target specific flags that instruction info tracks.
@ ThumbArithFlagSetting
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
AddrMode
ARM Addressing Modes.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
Define some predicates that are used for node matching.
Definition ARMEHABI.h:25
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
constexpr T rotr(T V, int R)
Definition bit.h:399
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
constexpr RegState getKillRegState(bool B)
unsigned getBLXpredOpcode(const MachineFunction &MF)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static bool isJumpTableBranchOpcode(int Opc)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
static bool isPopOpcode(int Opc)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:2033
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
constexpr RegState getUndefRegState(bool B)
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.