LLVM 23.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98
99 // fp SIMD ops
100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108};
109
112 : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
113 Subtarget(STI) {
114 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
115 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
116 llvm_unreachable("Duplicated entries?");
117 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
118 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
119 }
120}
121
122// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
123// currently defaults to no prepass hazard recognizer.
126 const ScheduleDAG *DAG) const {
127 if (usePreRAHazardRecognizer()) {
128 const InstrItineraryData *II =
129 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
130 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
131 }
133}
134
135// Called during:
136// - pre-RA scheduling
137// - post-RA scheduling when FeatureUseMISched is set
139 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
141
142 // We would like to restrict this hazard recognizer to only
143 // post-RA scheduling; we can tell that we're post-RA because we don't
144 // track VRegLiveness.
145 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
146 // banks banked on bit 2. Assume that TCMs are in use.
147 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
149 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
150
151 // Not inserting ARMHazardRecognizerFPMLx because that would change
152 // legacy behavior
153
155 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
156 return MHR;
157}
158
159// Called during post-RA scheduling when FeatureUseMISched is not set
162 const ScheduleDAG *DAG) const {
164
165 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
166 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
167
169 if (BHR)
170 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
171 return MHR;
172}
173
174// Branch analysis.
175// Cond vector output format:
176// 0 elements indicates an unconditional branch
177// 2 elements indicates a conditional branch; the elements are
178// the condition to check and the CPSR.
179// 3 elements indicates a hardware loop end; the elements
180// are the opcode, the operand value to test, and a dummy
181// operand used to pad out to 3 operands.
184 MachineBasicBlock *&FBB,
186 bool AllowModify) const {
187 TBB = nullptr;
188 FBB = nullptr;
189
191 if (I == MBB.instr_begin())
192 return false; // Empty blocks are easy.
193 --I;
194
195 // Walk backwards from the end of the basic block until the branch is
196 // analyzed or we give up.
197 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
198 // Flag to be raised on unanalyzeable instructions. This is useful in cases
199 // where we want to clean up on the end of the basic block before we bail
200 // out.
201 bool CantAnalyze = false;
202
203 // Skip over DEBUG values, predicated nonterminators and speculation
204 // barrier terminators.
205 while (I->isDebugInstr() || !I->isTerminator() ||
206 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
207 I->getOpcode() == ARM::t2DoLoopStartTP){
208 if (I == MBB.instr_begin())
209 return false;
210 --I;
211 }
212
213 if (isIndirectBranchOpcode(I->getOpcode()) ||
214 isJumpTableBranchOpcode(I->getOpcode())) {
215 // Indirect branches and jump tables can't be analyzed, but we still want
216 // to clean up any instructions at the tail of the basic block.
217 CantAnalyze = true;
218 } else if (isUncondBranchOpcode(I->getOpcode())) {
219 TBB = I->getOperand(0).getMBB();
220 } else if (isCondBranchOpcode(I->getOpcode())) {
221 // Bail out if we encounter multiple conditional branches.
222 if (!Cond.empty())
223 return true;
224
225 assert(!FBB && "FBB should have been null.");
226 FBB = TBB;
227 TBB = I->getOperand(0).getMBB();
228 Cond.push_back(I->getOperand(1));
229 Cond.push_back(I->getOperand(2));
230 } else if (I->isReturn()) {
231 // Returns can't be analyzed, but we should run cleanup.
232 CantAnalyze = true;
233 } else if (I->getOpcode() == ARM::t2LoopEnd &&
234 MBB.getParent()
235 ->getSubtarget<ARMSubtarget>()
237 if (!Cond.empty())
238 return true;
239 FBB = TBB;
240 TBB = I->getOperand(1).getMBB();
241 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
242 Cond.push_back(I->getOperand(0));
243 Cond.push_back(MachineOperand::CreateImm(0));
244 } else {
245 // We encountered other unrecognized terminator. Bail out immediately.
246 return true;
247 }
248
249 // Cleanup code - to be run for unpredicated unconditional branches and
250 // returns.
251 if (!isPredicated(*I) &&
252 (isUncondBranchOpcode(I->getOpcode()) ||
253 isIndirectBranchOpcode(I->getOpcode()) ||
254 isJumpTableBranchOpcode(I->getOpcode()) ||
255 I->isReturn())) {
256 // Forget any previous condition branch information - it no longer applies.
257 Cond.clear();
258 FBB = nullptr;
259
260 // If we can modify the function, delete everything below this
261 // unconditional branch.
262 if (AllowModify) {
263 MachineBasicBlock::iterator DI = std::next(I);
264 while (DI != MBB.instr_end()) {
265 MachineInstr &InstToDelete = *DI;
266 ++DI;
267 // Speculation barriers must not be deleted.
268 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
269 continue;
270 InstToDelete.eraseFromParent();
271 }
272 }
273 }
274
275 if (CantAnalyze) {
276 // We may not be able to analyze the block, but we could still have
277 // an unconditional branch as the last instruction in the block, which
278 // just branches to layout successor. If this is the case, then just
279 // remove it if we're allowed to make modifications.
280 if (AllowModify && !isPredicated(MBB.back()) &&
281 isUncondBranchOpcode(MBB.back().getOpcode()) &&
282 TBB && MBB.isLayoutSuccessor(TBB))
284 return true;
285 }
286
287 if (I == MBB.instr_begin())
288 return false;
289
290 --I;
291 }
292
293 // We made it past the terminators without bailing out - we must have
294 // analyzed this branch successfully.
295 return false;
296}
297
299 int *BytesRemoved) const {
300 assert(!BytesRemoved && "code size not handled");
301
302 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
303 if (I == MBB.end())
304 return 0;
305
306 if (!isUncondBranchOpcode(I->getOpcode()) &&
307 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
308 return 0;
309
310 // Remove the branch.
311 I->eraseFromParent();
312
313 I = MBB.end();
314
315 if (I == MBB.begin()) return 1;
316 --I;
317 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
318 return 1;
319
320 // Remove the branch.
321 I->eraseFromParent();
322 return 2;
323}
324
329 const DebugLoc &DL,
330 int *BytesAdded) const {
331 assert(!BytesAdded && "code size not handled");
332 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
333 int BOpc = !AFI->isThumbFunction()
334 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
335 int BccOpc = !AFI->isThumbFunction()
336 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
337 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
338
339 // Shouldn't be a fall through.
340 assert(TBB && "insertBranch must not be told to insert a fallthrough");
341 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
342 "ARM branch conditions have two or three components!");
343
344 // For conditional branches, we use addOperand to preserve CPSR flags.
345
346 if (!FBB) {
347 if (Cond.empty()) { // Unconditional branch?
348 if (isThumb)
350 else
351 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
352 } else if (Cond.size() == 2) {
353 BuildMI(&MBB, DL, get(BccOpc))
354 .addMBB(TBB)
355 .addImm(Cond[0].getImm())
356 .add(Cond[1]);
357 } else
358 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
359 return 1;
360 }
361
362 // Two-way conditional branch.
363 if (Cond.size() == 2)
364 BuildMI(&MBB, DL, get(BccOpc))
365 .addMBB(TBB)
366 .addImm(Cond[0].getImm())
367 .add(Cond[1]);
368 else if (Cond.size() == 3)
369 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
370 if (isThumb)
371 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
372 else
373 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
374 return 2;
375}
376
379 if (Cond.size() == 2) {
380 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
381 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
382 return false;
383 }
384 return true;
385}
386
388 if (MI.isBundle()) {
390 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
391 while (++I != E && I->isInsideBundle()) {
392 int PIdx = I->findFirstPredOperandIdx();
393 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
394 return true;
395 }
396 return false;
397 }
398
399 int PIdx = MI.findFirstPredOperandIdx();
400 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
401}
402
404 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
405 const TargetRegisterInfo *TRI) const {
406
407 // First, let's see if there is a generic comment for this operand
408 std::string GenericComment =
410 if (!GenericComment.empty())
411 return GenericComment;
412
413 // If not, check if we have an immediate operand.
414 if (!Op.isImm())
415 return std::string();
416
417 // And print its corresponding condition code if the immediate is a
418 // predicate.
419 int FirstPredOp = MI.findFirstPredOperandIdx();
420 if (FirstPredOp != (int) OpIdx)
421 return std::string();
422
423 std::string CC = "CC::";
424 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
425 return CC;
426}
427
430 unsigned Opc = MI.getOpcode();
433 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
434 .addImm(Pred[0].getImm())
435 .addReg(Pred[1].getReg());
436 return true;
437 }
438
439 int PIdx = MI.findFirstPredOperandIdx();
440 if (PIdx != -1) {
441 MachineOperand &PMO = MI.getOperand(PIdx);
442 PMO.setImm(Pred[0].getImm());
443 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
444
445 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
446 // IT block. This affects how they are printed.
447 const MCInstrDesc &MCID = MI.getDesc();
448 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
449 assert(MCID.operands()[1].isOptionalDef() &&
450 "CPSR def isn't expected operand");
451 assert((MI.getOperand(1).isDead() ||
452 MI.getOperand(1).getReg() != ARM::CPSR) &&
453 "if conversion tried to stop defining used CPSR");
454 MI.getOperand(1).setReg(ARM::NoRegister);
455 }
456
457 return true;
458 }
459 return false;
460}
461
463 ArrayRef<MachineOperand> Pred2) const {
464 if (Pred1.size() > 2 || Pred2.size() > 2)
465 return false;
466
467 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
468 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
469 if (CC1 == CC2)
470 return true;
471
472 switch (CC1) {
473 default:
474 return false;
475 case ARMCC::AL:
476 return true;
477 case ARMCC::HS:
478 return CC2 == ARMCC::HI;
479 case ARMCC::LS:
480 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
481 case ARMCC::GE:
482 return CC2 == ARMCC::GT;
483 case ARMCC::LE:
484 return CC2 == ARMCC::LT;
485 }
486}
487
489 std::vector<MachineOperand> &Pred,
490 bool SkipDead) const {
491 bool Found = false;
492 for (const MachineOperand &MO : MI.operands()) {
493 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
494 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
495 if (ClobbersCPSR || IsCPSR) {
496
497 // Filter out T1 instructions that have a dead CPSR,
498 // allowing IT blocks to be generated containing T1 instructions
499 const MCInstrDesc &MCID = MI.getDesc();
500 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
501 SkipDead)
502 continue;
503
504 Pred.push_back(MO);
505 Found = true;
506 }
507 }
508
509 return Found;
510}
511
513 for (const auto &MO : MI.operands())
514 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
515 return true;
516 return false;
517}
518
520 switch (MI->getOpcode()) {
521 default: return true;
522 case ARM::tADC: // ADC (register) T1
523 case ARM::tADDi3: // ADD (immediate) T1
524 case ARM::tADDi8: // ADD (immediate) T2
525 case ARM::tADDrr: // ADD (register) T1
526 case ARM::tAND: // AND (register) T1
527 case ARM::tASRri: // ASR (immediate) T1
528 case ARM::tASRrr: // ASR (register) T1
529 case ARM::tBIC: // BIC (register) T1
530 case ARM::tEOR: // EOR (register) T1
531 case ARM::tLSLri: // LSL (immediate) T1
532 case ARM::tLSLrr: // LSL (register) T1
533 case ARM::tLSRri: // LSR (immediate) T1
534 case ARM::tLSRrr: // LSR (register) T1
535 case ARM::tMUL: // MUL T1
536 case ARM::tMVN: // MVN (register) T1
537 case ARM::tORR: // ORR (register) T1
538 case ARM::tROR: // ROR (register) T1
539 case ARM::tRSB: // RSB (immediate) T1
540 case ARM::tSBC: // SBC (register) T1
541 case ARM::tSUBi3: // SUB (immediate) T1
542 case ARM::tSUBi8: // SUB (immediate) T2
543 case ARM::tSUBrr: // SUB (register) T1
545 }
546}
547
548/// isPredicable - Return true if the specified instruction can be predicated.
549/// By default, this returns true for every instruction with a
550/// PredicateOperand.
552 if (!MI.isPredicable())
553 return false;
554
555 if (MI.isBundle())
556 return false;
557
559 return false;
560
561 const MachineFunction *MF = MI.getParent()->getParent();
562 const ARMFunctionInfo *AFI =
564
565 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
566 // In their ARM encoding, they can't be encoded in a conditional form.
567 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
568 return false;
569
570 // Make indirect control flow changes unpredicable when SLS mitigation is
571 // enabled.
572 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
573 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
574 return false;
575 if (ST.hardenSlsBlr() && isIndirectCall(MI))
576 return false;
577
578 if (AFI->isThumb2Function()) {
579 if (getSubtarget().restrictIT())
580 return isV8EligibleForIT(&MI);
581 }
582
583 return true;
584}
585
586namespace llvm {
587
588template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
589 for (const MachineOperand &MO : MI->operands()) {
590 if (!MO.isReg() || MO.isUndef() || MO.isUse())
591 continue;
592 if (MO.getReg() != ARM::CPSR)
593 continue;
594 if (!MO.isDead())
595 return false;
596 }
597 // all definitions of CPSR are dead
598 return true;
599}
600
601} // end namespace llvm
602
603/// GetInstSize - Return the size of the specified MachineInstr.
604///
606 const MachineBasicBlock &MBB = *MI.getParent();
607 const MachineFunction *MF = MBB.getParent();
608 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
609
610 const MCInstrDesc &MCID = MI.getDesc();
611
612 switch (MI.getOpcode()) {
613 default:
614 // Return the size specified in .td file. If there's none, return 0, as we
615 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
616 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
617 // contrast to AArch64 instructions which have a default size of 4 bytes for
618 // example.
619 return MCID.getSize();
620 case TargetOpcode::BUNDLE:
621 return getInstBundleLength(MI);
622 case TargetOpcode::COPY:
624 return 4;
625 else
626 return 2;
627 case ARM::CONSTPOOL_ENTRY:
628 case ARM::JUMPTABLE_INSTS:
629 case ARM::JUMPTABLE_ADDRS:
630 case ARM::JUMPTABLE_TBB:
631 case ARM::JUMPTABLE_TBH:
632 // If this machine instr is a constant pool entry, its size is recorded as
633 // operand #2.
634 return MI.getOperand(2).getImm();
635 case ARM::SPACE:
636 return MI.getOperand(1).getImm();
637 case ARM::INLINEASM:
638 case ARM::INLINEASM_BR: {
639 // If this machine instr is an inline asm, measure it.
640 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
642 Size = alignTo(Size, 4);
643 return Size;
644 }
645 }
646}
647
648unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
649 unsigned Size = 0;
651 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
652 while (++I != E && I->isInsideBundle()) {
653 assert(!I->isBundle() && "No nested bundle!");
655 }
656 return Size;
657}
658
661 MCRegister DestReg, bool KillSrc,
662 const ARMSubtarget &Subtarget) const {
663 unsigned Opc = Subtarget.isThumb()
664 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
665 : ARM::MRS;
666
668 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
669
670 // There is only 1 A/R class MRS instruction, and it always refers to
671 // APSR. However, there are lots of other possibilities on M-class cores.
672 if (Subtarget.isMClass())
673 MIB.addImm(0x800);
674
675 MIB.add(predOps(ARMCC::AL))
676 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
677}
678
681 MCRegister SrcReg, bool KillSrc,
682 const ARMSubtarget &Subtarget) const {
683 unsigned Opc = Subtarget.isThumb()
684 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
685 : ARM::MSR;
686
687 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
688
689 if (Subtarget.isMClass())
690 MIB.addImm(0x800);
691 else
692 MIB.addImm(8);
693
694 MIB.addReg(SrcReg, getKillRegState(KillSrc))
697}
698
700 MIB.addImm(ARMVCC::None);
701 MIB.addReg(0);
702 MIB.addReg(0); // tp_reg
703}
704
710
712 MIB.addImm(Cond);
713 MIB.addReg(ARM::VPR, RegState::Implicit);
714 MIB.addReg(0); // tp_reg
715}
716
718 unsigned Cond, unsigned Inactive) {
720 MIB.addReg(Inactive);
721}
722
725 const DebugLoc &DL, Register DestReg,
726 Register SrcReg, bool KillSrc,
727 bool RenamableDest,
728 bool RenamableSrc) const {
729 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
730 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
731
732 if (GPRDest && GPRSrc) {
733 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
734 .addReg(SrcReg, getKillRegState(KillSrc))
736 .add(condCodeOp());
737 return;
738 }
739
740 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
741 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
742
743 unsigned Opc = 0;
744 if (SPRDest && SPRSrc)
745 Opc = ARM::VMOVS;
746 else if (GPRDest && SPRSrc)
747 Opc = ARM::VMOVRS;
748 else if (SPRDest && GPRSrc)
749 Opc = ARM::VMOVSR;
750 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
751 Opc = ARM::VMOVD;
752 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
753 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
754
755 if (Opc) {
756 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
757 MIB.addReg(SrcReg, getKillRegState(KillSrc));
758 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
759 MIB.addReg(SrcReg, getKillRegState(KillSrc));
760 if (Opc == ARM::MVE_VORR)
761 addUnpredicatedMveVpredROp(MIB, DestReg);
762 else if (Opc != ARM::MQPRCopy)
763 MIB.add(predOps(ARMCC::AL));
764 return;
765 }
766
767 // Handle register classes that require multiple instructions.
768 unsigned BeginIdx = 0;
769 unsigned SubRegs = 0;
770 int Spacing = 1;
771
772 // Use VORRq when possible.
773 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
774 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
775 BeginIdx = ARM::qsub_0;
776 SubRegs = 2;
777 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
778 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
779 BeginIdx = ARM::qsub_0;
780 SubRegs = 4;
781 // Fall back to VMOVD.
782 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
783 Opc = ARM::VMOVD;
784 BeginIdx = ARM::dsub_0;
785 SubRegs = 2;
786 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
787 Opc = ARM::VMOVD;
788 BeginIdx = ARM::dsub_0;
789 SubRegs = 3;
790 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
791 Opc = ARM::VMOVD;
792 BeginIdx = ARM::dsub_0;
793 SubRegs = 4;
794 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
795 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
796 BeginIdx = ARM::gsub_0;
797 SubRegs = 2;
798 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
799 Opc = ARM::VMOVD;
800 BeginIdx = ARM::dsub_0;
801 SubRegs = 2;
802 Spacing = 2;
803 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
804 Opc = ARM::VMOVD;
805 BeginIdx = ARM::dsub_0;
806 SubRegs = 3;
807 Spacing = 2;
808 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
809 Opc = ARM::VMOVD;
810 BeginIdx = ARM::dsub_0;
811 SubRegs = 4;
812 Spacing = 2;
813 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
814 !Subtarget.hasFP64()) {
815 Opc = ARM::VMOVS;
816 BeginIdx = ARM::ssub_0;
817 SubRegs = 2;
818 } else if (SrcReg == ARM::CPSR) {
819 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
820 return;
821 } else if (DestReg == ARM::CPSR) {
822 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
823 return;
824 } else if (DestReg == ARM::VPR) {
825 assert(ARM::GPRRegClass.contains(SrcReg));
826 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
827 .addReg(SrcReg, getKillRegState(KillSrc))
829 return;
830 } else if (SrcReg == ARM::VPR) {
831 assert(ARM::GPRRegClass.contains(DestReg));
832 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
833 .addReg(SrcReg, getKillRegState(KillSrc))
835 return;
836 } else if (DestReg == ARM::FPSCR_NZCV) {
837 assert(ARM::GPRRegClass.contains(SrcReg));
838 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
839 .addReg(SrcReg, getKillRegState(KillSrc))
841 return;
842 } else if (SrcReg == ARM::FPSCR_NZCV) {
843 assert(ARM::GPRRegClass.contains(DestReg));
844 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
845 .addReg(SrcReg, getKillRegState(KillSrc))
847 return;
848 }
849
850 assert(Opc && "Impossible reg-to-reg copy");
851
854
855 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
856 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
857 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
858 Spacing = -Spacing;
859 }
860#ifndef NDEBUG
861 SmallSet<unsigned, 4> DstRegs;
862#endif
863 for (unsigned i = 0; i != SubRegs; ++i) {
864 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
865 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
866 assert(Dst && Src && "Bad sub-register");
867#ifndef NDEBUG
868 assert(!DstRegs.count(Src) && "destructive vector copy");
869 DstRegs.insert(Dst);
870#endif
871 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
872 // VORR (NEON or MVE) takes two source operands.
873 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
874 Mov.addReg(Src);
875 }
876 // MVE VORR takes predicate operands in place of an ordinary condition.
877 if (Opc == ARM::MVE_VORR)
879 else
880 Mov = Mov.add(predOps(ARMCC::AL));
881 // MOVr can set CC.
882 if (Opc == ARM::MOVr)
883 Mov = Mov.add(condCodeOp());
884 }
885 // Add implicit super-register defs and kills to the last instruction.
886 Mov->addRegisterDefined(DestReg, TRI);
887 if (KillSrc)
888 Mov->addRegisterKilled(SrcReg, TRI);
889}
890
891std::optional<DestSourcePair>
893 // VMOVRRD is also a copy instruction but it requires
894 // special way of handling. It is more complex copy version
895 // and since that we are not considering it. For recognition
896 // of such instruction isExtractSubregLike MI interface fuction
897 // could be used.
898 // VORRq is considered as a move only if two inputs are
899 // the same register.
900 if (!MI.isMoveReg() ||
901 (MI.getOpcode() == ARM::VORRq &&
902 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
903 return std::nullopt;
904 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
905}
906
907std::optional<ParamLoadedValue>
909 Register Reg) const {
910 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
911 Register DstReg = DstSrcPair->Destination->getReg();
912
913 // TODO: We don't handle cases where the forwarding reg is narrower/wider
914 // than the copy registers. Consider for example:
915 //
916 // s16 = VMOVS s0
917 // s17 = VMOVS s1
918 // call @callee(d0)
919 //
920 // We'd like to describe the call site value of d0 as d8, but this requires
921 // gathering and merging the descriptions for the two VMOVS instructions.
922 //
923 // We also don't handle the reverse situation, where the forwarding reg is
924 // narrower than the copy destination:
925 //
926 // d8 = VMOVD d0
927 // call @callee(s1)
928 //
929 // We need to produce a fragment description (the call site value of s1 is
930 // /not/ just d8).
931 if (DstReg != Reg)
932 return std::nullopt;
933 }
935}
936
938 unsigned Reg,
939 unsigned SubIdx,
940 RegState State) const {
941 if (!SubIdx)
942 return MIB.addReg(Reg, State);
943
945 return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State);
946 return MIB.addReg(Reg, State, SubIdx);
947}
948
951 Register SrcReg, bool isKill, int FI,
952 const TargetRegisterClass *RC,
953 Register VReg,
954 MachineInstr::MIFlag Flags) const {
955 MachineFunction &MF = *MBB.getParent();
956 MachineFrameInfo &MFI = MF.getFrameInfo();
957 Align Alignment = MFI.getObjectAlign(FI);
959
962 MFI.getObjectSize(FI), Alignment);
963
964 switch (TRI.getSpillSize(*RC)) {
965 case 2:
966 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
967 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
968 .addReg(SrcReg, getKillRegState(isKill))
969 .addFrameIndex(FI)
970 .addImm(0)
971 .addMemOperand(MMO)
973 } else
974 llvm_unreachable("Unknown reg class!");
975 break;
976 case 4:
977 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
978 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
979 .addReg(SrcReg, getKillRegState(isKill))
980 .addFrameIndex(FI)
981 .addImm(0)
982 .addMemOperand(MMO)
984 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
985 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
986 .addReg(SrcReg, getKillRegState(isKill))
987 .addFrameIndex(FI)
988 .addImm(0)
989 .addMemOperand(MMO)
991 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
992 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
993 .addReg(SrcReg, getKillRegState(isKill))
994 .addFrameIndex(FI)
995 .addImm(0)
996 .addMemOperand(MMO)
998 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
999 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
1000 .addReg(SrcReg, getKillRegState(isKill))
1001 .addFrameIndex(FI)
1002 .addImm(0)
1003 .addMemOperand(MMO)
1005 } else
1006 llvm_unreachable("Unknown reg class!");
1007 break;
1008 case 8:
1009 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1010 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1011 .addReg(SrcReg, getKillRegState(isKill))
1012 .addFrameIndex(FI)
1013 .addImm(0)
1014 .addMemOperand(MMO)
1016 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1017 if (Subtarget.hasV5TEOps()) {
1018 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1019 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1020 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1021 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1023 } else {
1024 // Fallback to STM instruction, which has existed since the dawn of
1025 // time.
1026 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1027 .addFrameIndex(FI)
1028 .addMemOperand(MMO)
1030 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
1031 AddDReg(MIB, SrcReg, ARM::gsub_1, {});
1032 }
1033 } else
1034 llvm_unreachable("Unknown reg class!");
1035 break;
1036 case 16:
1037 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1038 // Use aligned spills if the stack can be realigned.
1039 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1040 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1041 .addFrameIndex(FI)
1042 .addImm(16)
1043 .addReg(SrcReg, getKillRegState(isKill))
1044 .addMemOperand(MMO)
1046 } else {
1047 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1048 .addReg(SrcReg, getKillRegState(isKill))
1049 .addFrameIndex(FI)
1050 .addMemOperand(MMO)
1052 }
1053 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1054 Subtarget.hasMVEIntegerOps()) {
1055 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1056 MIB.addReg(SrcReg, getKillRegState(isKill))
1057 .addFrameIndex(FI)
1058 .addImm(0)
1059 .addMemOperand(MMO);
1061 } else
1062 llvm_unreachable("Unknown reg class!");
1063 break;
1064 case 24:
1065 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1066 // Use aligned spills if the stack can be realigned.
1067 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1068 Subtarget.hasNEON()) {
1069 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1070 .addFrameIndex(FI)
1071 .addImm(16)
1072 .addReg(SrcReg, getKillRegState(isKill))
1073 .addMemOperand(MMO)
1075 } else {
1077 get(ARM::VSTMDIA))
1078 .addFrameIndex(FI)
1080 .addMemOperand(MMO);
1081 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1082 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1083 AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1084 }
1085 } else
1086 llvm_unreachable("Unknown reg class!");
1087 break;
1088 case 32:
1089 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1090 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1091 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1092 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1093 Subtarget.hasNEON()) {
1094 // FIXME: It's possible to only store part of the QQ register if the
1095 // spilled def has a sub-register index.
1096 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1097 .addFrameIndex(FI)
1098 .addImm(16)
1099 .addReg(SrcReg, getKillRegState(isKill))
1100 .addMemOperand(MMO)
1102 } else if (Subtarget.hasMVEIntegerOps()) {
1103 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1104 .addReg(SrcReg, getKillRegState(isKill))
1105 .addFrameIndex(FI)
1106 .addMemOperand(MMO);
1107 } else {
1109 get(ARM::VSTMDIA))
1110 .addFrameIndex(FI)
1112 .addMemOperand(MMO);
1113 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1114 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1115 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1116 AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1117 }
1118 } else
1119 llvm_unreachable("Unknown reg class!");
1120 break;
1121 case 64:
1122 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1123 Subtarget.hasMVEIntegerOps()) {
1124 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1125 .addReg(SrcReg, getKillRegState(isKill))
1126 .addFrameIndex(FI)
1127 .addMemOperand(MMO);
1128 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1129 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1130 .addFrameIndex(FI)
1132 .addMemOperand(MMO);
1133 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
1134 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, {});
1135 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, {});
1136 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, {});
1137 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, {});
1138 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, {});
1139 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, {});
1140 AddDReg(MIB, SrcReg, ARM::dsub_7, {});
1141 } else
1142 llvm_unreachable("Unknown reg class!");
1143 break;
1144 default:
1145 llvm_unreachable("Unknown reg class!");
1146 }
1147}
1148
1150 int &FrameIndex) const {
1151 switch (MI.getOpcode()) {
1152 default: break;
1153 case ARM::STRrs:
1154 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1155 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1156 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1157 MI.getOperand(3).getImm() == 0) {
1158 FrameIndex = MI.getOperand(1).getIndex();
1159 return MI.getOperand(0).getReg();
1160 }
1161 break;
1162 case ARM::STRi12:
1163 case ARM::t2STRi12:
1164 case ARM::tSTRspi:
1165 case ARM::VSTRD:
1166 case ARM::VSTRS:
1167 case ARM::VSTRH:
1168 case ARM::VSTR_P0_off:
1169 case ARM::VSTR_FPSCR_NZCVQC_off:
1170 case ARM::MVE_VSTRWU32:
1171 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1172 MI.getOperand(2).getImm() == 0) {
1173 FrameIndex = MI.getOperand(1).getIndex();
1174 return MI.getOperand(0).getReg();
1175 }
1176 break;
1177 case ARM::VST1q64:
1178 case ARM::VST1d64TPseudo:
1179 case ARM::VST1d64QPseudo:
1180 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1181 FrameIndex = MI.getOperand(0).getIndex();
1182 return MI.getOperand(2).getReg();
1183 }
1184 break;
1185 case ARM::VSTMQIA:
1186 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1187 FrameIndex = MI.getOperand(1).getIndex();
1188 return MI.getOperand(0).getReg();
1189 }
1190 break;
1191 case ARM::MQQPRStore:
1192 case ARM::MQQQQPRStore:
1193 if (MI.getOperand(1).isFI()) {
1194 FrameIndex = MI.getOperand(1).getIndex();
1195 return MI.getOperand(0).getReg();
1196 }
1197 break;
1198 }
1199
1200 return 0;
1201}
1202
1204 int &FrameIndex) const {
1206 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1207 Accesses.size() == 1) {
1208 FrameIndex =
1209 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1210 ->getFrameIndex();
1211 return true;
1212 }
1213 return false;
1214}
1215
1218 Register DestReg, int FI,
1219 const TargetRegisterClass *RC,
1220 Register VReg, unsigned SubReg,
1221 MachineInstr::MIFlag Flags) const {
1222 DebugLoc DL;
1223 if (I != MBB.end()) DL = I->getDebugLoc();
1224 MachineFunction &MF = *MBB.getParent();
1225 MachineFrameInfo &MFI = MF.getFrameInfo();
1226 const Align Alignment = MFI.getObjectAlign(FI);
1229 MFI.getObjectSize(FI), Alignment);
1230
1232 switch (TRI.getSpillSize(*RC)) {
1233 case 2:
1234 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1235 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1236 .addFrameIndex(FI)
1237 .addImm(0)
1238 .addMemOperand(MMO)
1240 } else
1241 llvm_unreachable("Unknown reg class!");
1242 break;
1243 case 4:
1244 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1245 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1246 .addFrameIndex(FI)
1247 .addImm(0)
1248 .addMemOperand(MMO)
1250 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1251 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1252 .addFrameIndex(FI)
1253 .addImm(0)
1254 .addMemOperand(MMO)
1256 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1257 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1258 .addFrameIndex(FI)
1259 .addImm(0)
1260 .addMemOperand(MMO)
1262 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1263 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1264 .addFrameIndex(FI)
1265 .addImm(0)
1266 .addMemOperand(MMO)
1268 } else
1269 llvm_unreachable("Unknown reg class!");
1270 break;
1271 case 8:
1272 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1273 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1274 .addFrameIndex(FI)
1275 .addImm(0)
1276 .addMemOperand(MMO)
1278 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1280
1281 if (Subtarget.hasV5TEOps()) {
1282 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1283 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1284 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1285 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1287 } else {
1288 // Fallback to LDM instruction, which has existed since the dawn of
1289 // time.
1290 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1291 .addFrameIndex(FI)
1292 .addMemOperand(MMO)
1294 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
1295 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
1296 }
1297
1298 if (DestReg.isPhysical())
1299 MIB.addReg(DestReg, RegState::ImplicitDefine);
1300 } else
1301 llvm_unreachable("Unknown reg class!");
1302 break;
1303 case 16:
1304 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1305 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1306 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1307 .addFrameIndex(FI)
1308 .addImm(16)
1309 .addMemOperand(MMO)
1311 } else {
1312 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1313 .addFrameIndex(FI)
1314 .addMemOperand(MMO)
1316 }
1317 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1318 Subtarget.hasMVEIntegerOps()) {
1319 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1320 MIB.addFrameIndex(FI)
1321 .addImm(0)
1322 .addMemOperand(MMO);
1324 } else
1325 llvm_unreachable("Unknown reg class!");
1326 break;
1327 case 24:
1328 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1329 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1330 Subtarget.hasNEON()) {
1331 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1332 .addFrameIndex(FI)
1333 .addImm(16)
1334 .addMemOperand(MMO)
1336 } else {
1337 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1338 .addFrameIndex(FI)
1339 .addMemOperand(MMO)
1341 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1342 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1343 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1344 if (DestReg.isPhysical())
1345 MIB.addReg(DestReg, RegState::ImplicitDefine);
1346 }
1347 } else
1348 llvm_unreachable("Unknown reg class!");
1349 break;
1350 case 32:
1351 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1352 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1353 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1354 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1355 Subtarget.hasNEON()) {
1356 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1357 .addFrameIndex(FI)
1358 .addImm(16)
1359 .addMemOperand(MMO)
1361 } else if (Subtarget.hasMVEIntegerOps()) {
1362 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1363 .addFrameIndex(FI)
1364 .addMemOperand(MMO);
1365 } else {
1366 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1367 .addFrameIndex(FI)
1369 .addMemOperand(MMO);
1370 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1371 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1372 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1373 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1374 if (DestReg.isPhysical())
1375 MIB.addReg(DestReg, RegState::ImplicitDefine);
1376 }
1377 } else
1378 llvm_unreachable("Unknown reg class!");
1379 break;
1380 case 64:
1381 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1382 Subtarget.hasMVEIntegerOps()) {
1383 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1384 .addFrameIndex(FI)
1385 .addMemOperand(MMO);
1386 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1387 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1388 .addFrameIndex(FI)
1390 .addMemOperand(MMO);
1391 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
1392 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
1393 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
1394 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
1395 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead);
1396 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead);
1397 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead);
1398 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead);
1399 if (DestReg.isPhysical())
1400 MIB.addReg(DestReg, RegState::ImplicitDefine);
1401 } else
1402 llvm_unreachable("Unknown reg class!");
1403 break;
1404 default:
1405 llvm_unreachable("Unknown regclass!");
1406 }
1407}
1408
1410 int &FrameIndex) const {
1411 switch (MI.getOpcode()) {
1412 default: break;
1413 case ARM::LDRrs:
1414 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1415 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1416 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1417 MI.getOperand(3).getImm() == 0) {
1418 FrameIndex = MI.getOperand(1).getIndex();
1419 return MI.getOperand(0).getReg();
1420 }
1421 break;
1422 case ARM::LDRi12:
1423 case ARM::t2LDRi12:
1424 case ARM::tLDRspi:
1425 case ARM::VLDRD:
1426 case ARM::VLDRS:
1427 case ARM::VLDRH:
1428 case ARM::VLDR_P0_off:
1429 case ARM::VLDR_FPSCR_NZCVQC_off:
1430 case ARM::MVE_VLDRWU32:
1431 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1432 MI.getOperand(2).getImm() == 0) {
1433 FrameIndex = MI.getOperand(1).getIndex();
1434 return MI.getOperand(0).getReg();
1435 }
1436 break;
1437 case ARM::VLD1q64:
1438 case ARM::VLD1d8TPseudo:
1439 case ARM::VLD1d16TPseudo:
1440 case ARM::VLD1d32TPseudo:
1441 case ARM::VLD1d64TPseudo:
1442 case ARM::VLD1d8QPseudo:
1443 case ARM::VLD1d16QPseudo:
1444 case ARM::VLD1d32QPseudo:
1445 case ARM::VLD1d64QPseudo:
1446 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1447 FrameIndex = MI.getOperand(1).getIndex();
1448 return MI.getOperand(0).getReg();
1449 }
1450 break;
1451 case ARM::VLDMQIA:
1452 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1453 FrameIndex = MI.getOperand(1).getIndex();
1454 return MI.getOperand(0).getReg();
1455 }
1456 break;
1457 case ARM::MQQPRLoad:
1458 case ARM::MQQQQPRLoad:
1459 if (MI.getOperand(1).isFI()) {
1460 FrameIndex = MI.getOperand(1).getIndex();
1461 return MI.getOperand(0).getReg();
1462 }
1463 break;
1464 }
1465
1466 return 0;
1467}
1468
1470 int &FrameIndex) const {
1472 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1473 Accesses.size() == 1) {
1474 FrameIndex =
1475 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1476 ->getFrameIndex();
1477 return true;
1478 }
1479 return false;
1480}
1481
1482/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1483/// depending on whether the result is used.
1484void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1485 bool isThumb1 = Subtarget.isThumb1Only();
1486 bool isThumb2 = Subtarget.isThumb2();
1487 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1488
1489 DebugLoc dl = MI->getDebugLoc();
1490 MachineBasicBlock *BB = MI->getParent();
1491
1492 MachineInstrBuilder LDM, STM;
1493 if (isThumb1 || !MI->getOperand(1).isDead()) {
1494 MachineOperand LDWb(MI->getOperand(1));
1495 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1496 : isThumb1 ? ARM::tLDMIA_UPD
1497 : ARM::LDMIA_UPD))
1498 .add(LDWb);
1499 } else {
1500 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1501 }
1502
1503 if (isThumb1 || !MI->getOperand(0).isDead()) {
1504 MachineOperand STWb(MI->getOperand(0));
1505 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1506 : isThumb1 ? ARM::tSTMIA_UPD
1507 : ARM::STMIA_UPD))
1508 .add(STWb);
1509 } else {
1510 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1511 }
1512
1513 MachineOperand LDBase(MI->getOperand(3));
1514 LDM.add(LDBase).add(predOps(ARMCC::AL));
1515
1516 MachineOperand STBase(MI->getOperand(2));
1517 STM.add(STBase).add(predOps(ARMCC::AL));
1518
1519 // Sort the scratch registers into ascending order.
1520 const TargetRegisterInfo &TRI = getRegisterInfo();
1521 SmallVector<unsigned, 6> ScratchRegs;
1522 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1523 ScratchRegs.push_back(MO.getReg());
1524 llvm::sort(ScratchRegs,
1525 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1526 return TRI.getEncodingValue(Reg1) <
1527 TRI.getEncodingValue(Reg2);
1528 });
1529
1530 for (const auto &Reg : ScratchRegs) {
1533 }
1534
1535 BB->erase(MI);
1536}
1537
1539 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1540 expandLoadStackGuard(MI);
1541 MI.getParent()->erase(MI);
1542 return true;
1543 }
1544
1545 if (MI.getOpcode() == ARM::MEMCPY) {
1546 expandMEMCPY(MI);
1547 return true;
1548 }
1549
1550 // This hook gets to expand COPY instructions before they become
1551 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1552 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1553 // changed into a VORR that can go down the NEON pipeline.
1554 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1555 return false;
1556
1557 // Look for a copy between even S-registers. That is where we keep floats
1558 // when using NEON v2f32 instructions for f32 arithmetic.
1559 Register DstRegS = MI.getOperand(0).getReg();
1560 Register SrcRegS = MI.getOperand(1).getReg();
1561 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1562 return false;
1563
1565 MCRegister DstRegD =
1566 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1567 MCRegister SrcRegD =
1568 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1569 if (!DstRegD || !SrcRegD)
1570 return false;
1571
1572 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1573 // legal if the COPY already defines the full DstRegD, and it isn't a
1574 // sub-register insertion.
1575 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1576 return false;
1577
1578 // A dead copy shouldn't show up here, but reject it just in case.
1579 if (MI.getOperand(0).isDead())
1580 return false;
1581
1582 // All clear, widen the COPY.
1583 LLVM_DEBUG(dbgs() << "widening: " << MI);
1584 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1585
1586 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1587 // or some other super-register.
1588 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1589 if (ImpDefIdx != -1)
1590 MI.removeOperand(ImpDefIdx);
1591
1592 // Change the opcode and operands.
1593 MI.setDesc(get(ARM::VMOVD));
1594 MI.getOperand(0).setReg(DstRegD);
1595 MI.getOperand(1).setReg(SrcRegD);
1596 MIB.add(predOps(ARMCC::AL));
1597
1598 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1599 // register scavenger and machine verifier, so we need to indicate that we
1600 // are reading an undefined value from SrcRegD, but a proper value from
1601 // SrcRegS.
1602 MI.getOperand(1).setIsUndef();
1603 MIB.addReg(SrcRegS, RegState::Implicit);
1604
1605 // SrcRegD may actually contain an unrelated value in the ssub_1
1606 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1607 if (MI.getOperand(1).isKill()) {
1608 MI.getOperand(1).setIsKill(false);
1609 MI.addRegisterKilled(SrcRegS, TRI, true);
1610 }
1611
1612 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1613 return true;
1614}
1615
1616/// Create a copy of a const pool value. Update CPI to the new index and return
1617/// the label UID.
1618static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1621
1622 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1623 assert(MCPE.isMachineConstantPoolEntry() &&
1624 "Expecting a machine constantpool entry!");
1625 ARMConstantPoolValue *ACPV =
1626 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1627
1628 unsigned PCLabelId = AFI->createPICLabelUId();
1629 ARMConstantPoolValue *NewCPV = nullptr;
1630
1631 // FIXME: The below assumes PIC relocation model and that the function
1632 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1633 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1634 // instructions, so that's probably OK, but is PIC always correct when
1635 // we get here?
1636 if (ACPV->isGlobalValue())
1638 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1639 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1640 else if (ACPV->isExtSymbol())
1643 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1644 else if (ACPV->isBlockAddress())
1646 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1648 else if (ACPV->isLSDA())
1649 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1650 ARMCP::CPLSDA, 4);
1651 else if (ACPV->isMachineBasicBlock())
1652 NewCPV = ARMConstantPoolMBB::
1654 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1655 else
1656 llvm_unreachable("Unexpected ARM constantpool value type!!");
1657 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1658 return PCLabelId;
1659}
1660
1663 Register DestReg, unsigned SubIdx,
1664 const MachineInstr &Orig) const {
1665 unsigned Opcode = Orig.getOpcode();
1666 switch (Opcode) {
1667 default: {
1668 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1669 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1670 MBB.insert(I, MI);
1671 break;
1672 }
1673 case ARM::tLDRpci_pic:
1674 case ARM::t2LDRpci_pic: {
1675 MachineFunction &MF = *MBB.getParent();
1676 unsigned CPI = Orig.getOperand(1).getIndex();
1677 unsigned PCLabelId = duplicateCPV(MF, CPI);
1678 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1680 .addImm(PCLabelId)
1681 .cloneMemRefs(Orig);
1682 break;
1683 }
1684 }
1685}
1686
1689 MachineBasicBlock::iterator InsertBefore,
1690 const MachineInstr &Orig) const {
1691 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1693 for (;;) {
1694 switch (I->getOpcode()) {
1695 case ARM::tLDRpci_pic:
1696 case ARM::t2LDRpci_pic: {
1697 MachineFunction &MF = *MBB.getParent();
1698 unsigned CPI = I->getOperand(1).getIndex();
1699 unsigned PCLabelId = duplicateCPV(MF, CPI);
1700 I->getOperand(1).setIndex(CPI);
1701 I->getOperand(2).setImm(PCLabelId);
1702 break;
1703 }
1704 }
1705 if (!I->isBundledWithSucc())
1706 break;
1707 ++I;
1708 }
1709 return Cloned;
1710}
1711
1713 const MachineInstr &MI1,
1714 const MachineRegisterInfo *MRI) const {
1715 unsigned Opcode = MI0.getOpcode();
1716 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1717 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1718 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1719 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1720 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1721 Opcode == ARM::t2MOV_ga_pcrel) {
1722 if (MI1.getOpcode() != Opcode)
1723 return false;
1724 if (MI0.getNumOperands() != MI1.getNumOperands())
1725 return false;
1726
1727 const MachineOperand &MO0 = MI0.getOperand(1);
1728 const MachineOperand &MO1 = MI1.getOperand(1);
1729 if (MO0.getOffset() != MO1.getOffset())
1730 return false;
1731
1732 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1733 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1734 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1735 Opcode == ARM::t2MOV_ga_pcrel)
1736 // Ignore the PC labels.
1737 return MO0.getGlobal() == MO1.getGlobal();
1738
1739 const MachineFunction *MF = MI0.getParent()->getParent();
1740 const MachineConstantPool *MCP = MF->getConstantPool();
1741 int CPI0 = MO0.getIndex();
1742 int CPI1 = MO1.getIndex();
1743 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1744 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1745 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1746 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1747 if (isARMCP0 && isARMCP1) {
1748 ARMConstantPoolValue *ACPV0 =
1749 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1750 ARMConstantPoolValue *ACPV1 =
1751 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1752 return ACPV0->hasSameValue(ACPV1);
1753 } else if (!isARMCP0 && !isARMCP1) {
1754 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1755 }
1756 return false;
1757 } else if (Opcode == ARM::PICLDR) {
1758 if (MI1.getOpcode() != Opcode)
1759 return false;
1760 if (MI0.getNumOperands() != MI1.getNumOperands())
1761 return false;
1762
1763 Register Addr0 = MI0.getOperand(1).getReg();
1764 Register Addr1 = MI1.getOperand(1).getReg();
1765 if (Addr0 != Addr1) {
1766 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1767 return false;
1768
1769 // This assumes SSA form.
1770 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1771 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1772 // Check if the loaded value, e.g. a constantpool of a global address, are
1773 // the same.
1774 if (!produceSameValue(*Def0, *Def1, MRI))
1775 return false;
1776 }
1777
1778 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1779 // %12 = PICLDR %11, 0, 14, %noreg
1780 const MachineOperand &MO0 = MI0.getOperand(i);
1781 const MachineOperand &MO1 = MI1.getOperand(i);
1782 if (!MO0.isIdenticalTo(MO1))
1783 return false;
1784 }
1785 return true;
1786 }
1787
1789}
1790
1791/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1792/// determine if two loads are loading from the same base address. It should
1793/// only return true if the base pointers are the same and the only differences
1794/// between the two addresses is the offset. It also returns the offsets by
1795/// reference.
1796///
1797/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1798/// is permanently disabled.
1800 int64_t &Offset1,
1801 int64_t &Offset2) const {
1802 // Don't worry about Thumb: just ARM and Thumb2.
1803 if (Subtarget.isThumb1Only()) return false;
1804
1805 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1806 return false;
1807
1808 auto IsLoadOpcode = [&](unsigned Opcode) {
1809 switch (Opcode) {
1810 default:
1811 return false;
1812 case ARM::LDRi12:
1813 case ARM::LDRBi12:
1814 case ARM::LDRD:
1815 case ARM::LDRH:
1816 case ARM::LDRSB:
1817 case ARM::LDRSH:
1818 case ARM::VLDRD:
1819 case ARM::VLDRS:
1820 case ARM::t2LDRi8:
1821 case ARM::t2LDRBi8:
1822 case ARM::t2LDRDi8:
1823 case ARM::t2LDRSHi8:
1824 case ARM::t2LDRi12:
1825 case ARM::t2LDRBi12:
1826 case ARM::t2LDRSHi12:
1827 return true;
1828 }
1829 };
1830
1831 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1832 !IsLoadOpcode(Load2->getMachineOpcode()))
1833 return false;
1834
1835 // Check if base addresses and chain operands match.
1836 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1837 Load1->getOperand(4) != Load2->getOperand(4))
1838 return false;
1839
1840 // Index should be Reg0.
1841 if (Load1->getOperand(3) != Load2->getOperand(3))
1842 return false;
1843
1844 // Determine the offsets.
1845 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1846 isa<ConstantSDNode>(Load2->getOperand(1))) {
1847 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1848 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1849 return true;
1850 }
1851
1852 return false;
1853}
1854
1855/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1856/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1857/// be scheduled togther. On some targets if two loads are loading from
1858/// addresses in the same cache line, it's better if they are scheduled
1859/// together. This function takes two integers that represent the load offsets
1860/// from the common base address. It returns true if it decides it's desirable
1861/// to schedule the two loads together. "NumLoads" is the number of loads that
1862/// have already been scheduled after Load1.
1863///
1864/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1865/// is permanently disabled.
1867 int64_t Offset1, int64_t Offset2,
1868 unsigned NumLoads) const {
1869 // Don't worry about Thumb: just ARM and Thumb2.
1870 if (Subtarget.isThumb1Only()) return false;
1871
1872 assert(Offset2 > Offset1);
1873
1874 if ((Offset2 - Offset1) / 8 > 64)
1875 return false;
1876
1877 // Check if the machine opcodes are different. If they are different
1878 // then we consider them to not be of the same base address,
1879 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1880 // In this case, they are considered to be the same because they are different
1881 // encoding forms of the same basic instruction.
1882 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1883 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1884 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1885 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1886 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1887 return false; // FIXME: overly conservative?
1888
1889 // Four loads in a row should be sufficient.
1890 if (NumLoads >= 3)
1891 return false;
1892
1893 return true;
1894}
1895
1897 const MachineBasicBlock *MBB,
1898 const MachineFunction &MF) const {
1899 // Debug info is never a scheduling boundary. It's necessary to be explicit
1900 // due to the special treatment of IT instructions below, otherwise a
1901 // dbg_value followed by an IT will result in the IT instruction being
1902 // considered a scheduling hazard, which is wrong. It should be the actual
1903 // instruction preceding the dbg_value instruction(s), just like it is
1904 // when debug info is not present.
1905 if (MI.isDebugInstr())
1906 return false;
1907
1908 // Terminators and labels can't be scheduled around.
1909 if (MI.isTerminator() || MI.isPosition())
1910 return true;
1911
1912 // INLINEASM_BR can jump to another block
1913 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1914 return true;
1915
1916 if (isSEHInstruction(MI))
1917 return true;
1918
1919 // Treat the start of the IT block as a scheduling boundary, but schedule
1920 // t2IT along with all instructions following it.
1921 // FIXME: This is a big hammer. But the alternative is to add all potential
1922 // true and anti dependencies to IT block instructions as implicit operands
1923 // to the t2IT instruction. The added compile time and complexity does not
1924 // seem worth it.
1926 // Make sure to skip any debug instructions
1927 while (++I != MBB->end() && I->isDebugInstr())
1928 ;
1929 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1930 return true;
1931
1932 // Don't attempt to schedule around any instruction that defines
1933 // a stack-oriented pointer, as it's unlikely to be profitable. This
1934 // saves compile time, because it doesn't require every single
1935 // stack slot reference to depend on the instruction that does the
1936 // modification.
1937 // Calls don't actually change the stack pointer, even if they have imp-defs.
1938 // No ARM calling conventions change the stack pointer. (X86 calling
1939 // conventions sometimes do).
1940 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
1941 return true;
1942
1943 return false;
1944}
1945
1948 unsigned NumCycles, unsigned ExtraPredCycles,
1949 BranchProbability Probability) const {
1950 if (!NumCycles)
1951 return false;
1952
1953 // If we are optimizing for size, see if the branch in the predecessor can be
1954 // lowered to cbn?z by the constant island lowering pass, and return false if
1955 // so. This results in a shorter instruction sequence.
1956 if (MBB.getParent()->getFunction().hasOptSize()) {
1957 MachineBasicBlock *Pred = *MBB.pred_begin();
1958 if (!Pred->empty()) {
1959 MachineInstr *LastMI = &*Pred->rbegin();
1960 if (LastMI->getOpcode() == ARM::t2Bcc) {
1962 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
1963 if (CmpMI)
1964 return false;
1965 }
1966 }
1967 }
1968 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1969 MBB, 0, 0, Probability);
1970}
1971
1974 unsigned TCycles, unsigned TExtra,
1975 MachineBasicBlock &FBB,
1976 unsigned FCycles, unsigned FExtra,
1977 BranchProbability Probability) const {
1978 if (!TCycles)
1979 return false;
1980
1981 // In thumb code we often end up trading one branch for a IT block, and
1982 // if we are cloning the instruction can increase code size. Prevent
1983 // blocks with multiple predecesors from being ifcvted to prevent this
1984 // cloning.
1985 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1986 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1987 return false;
1988 }
1989
1990 // Attempt to estimate the relative costs of predication versus branching.
1991 // Here we scale up each component of UnpredCost to avoid precision issue when
1992 // scaling TCycles/FCycles by Probability.
1993 const unsigned ScalingUpFactor = 1024;
1994
1995 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1996 unsigned UnpredCost;
1997 if (!Subtarget.hasBranchPredictor()) {
1998 // When we don't have a branch predictor it's always cheaper to not take a
1999 // branch than take it, so we have to take that into account.
2000 unsigned NotTakenBranchCost = 1;
2001 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2002 unsigned TUnpredCycles, FUnpredCycles;
2003 if (!FCycles) {
2004 // Triangle: TBB is the fallthrough
2005 TUnpredCycles = TCycles + NotTakenBranchCost;
2006 FUnpredCycles = TakenBranchCost;
2007 } else {
2008 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2009 TUnpredCycles = TCycles + TakenBranchCost;
2010 FUnpredCycles = FCycles + NotTakenBranchCost;
2011 // The branch at the end of FBB will disappear when it's predicated, so
2012 // discount it from PredCost.
2013 PredCost -= 1 * ScalingUpFactor;
2014 }
2015 // The total cost is the cost of each path scaled by their probabilites
2016 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2017 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2018 UnpredCost = TUnpredCost + FUnpredCost;
2019 // When predicating assume that the first IT can be folded away but later
2020 // ones cost one cycle each
2021 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2022 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2023 }
2024 } else {
2025 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2026 unsigned FUnpredCost =
2027 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2028 UnpredCost = TUnpredCost + FUnpredCost;
2029 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2030 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2031 }
2032
2033 return PredCost <= UnpredCost;
2034}
2035
2036unsigned
2038 unsigned NumInsts) const {
2039 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2040 // ARM has a condition code field in every predicable instruction, using it
2041 // doesn't change code size.
2042 if (!Subtarget.isThumb2())
2043 return 0;
2044
2045 // It's possible that the size of the IT is restricted to a single block.
2046 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2047 return divideCeil(NumInsts, MaxInsts) * 2;
2048}
2049
2050unsigned
2052 // If this branch is likely to be folded into the comparison to form a
2053 // CB(N)Z, then removing it won't reduce code size at all, because that will
2054 // just replace the CB(N)Z with a CMP.
2055 if (MI.getOpcode() == ARM::t2Bcc &&
2057 return 0;
2058
2059 unsigned Size = getInstSizeInBytes(MI);
2060
2061 // For Thumb2, all branches are 32-bit instructions during the if conversion
2062 // pass, but may be replaced with 16-bit instructions during size reduction.
2063 // Since the branches considered by if conversion tend to be forward branches
2064 // over small basic blocks, they are very likely to be in range for the
2065 // narrow instructions, so we assume the final code size will be half what it
2066 // currently is.
2067 if (Subtarget.isThumb2())
2068 Size /= 2;
2069
2070 return Size;
2071}
2072
2073bool
2075 MachineBasicBlock &FMBB) const {
2076 // Reduce false anti-dependencies to let the target's out-of-order execution
2077 // engine do its thing.
2078 return Subtarget.isProfitableToUnpredicate();
2079}
2080
2081/// getInstrPredicate - If instruction is predicated, returns its predicate
2082/// condition, otherwise returns AL. It also returns the condition code
2083/// register by reference.
2085 Register &PredReg) {
2086 int PIdx = MI.findFirstPredOperandIdx();
2087 if (PIdx == -1) {
2088 PredReg = 0;
2089 return ARMCC::AL;
2090 }
2091
2092 PredReg = MI.getOperand(PIdx+1).getReg();
2093 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2094}
2095
2097 if (Opc == ARM::B)
2098 return ARM::Bcc;
2099 if (Opc == ARM::tB)
2100 return ARM::tBcc;
2101 if (Opc == ARM::t2B)
2102 return ARM::t2Bcc;
2103
2104 llvm_unreachable("Unknown unconditional branch opcode!");
2105}
2106
2108 bool NewMI,
2109 unsigned OpIdx1,
2110 unsigned OpIdx2) const {
2111 switch (MI.getOpcode()) {
2112 case ARM::MOVCCr:
2113 case ARM::t2MOVCCr: {
2114 // MOVCC can be commuted by inverting the condition.
2115 Register PredReg;
2116 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2117 // MOVCC AL can't be inverted. Shouldn't happen.
2118 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2119 return nullptr;
2120 MachineInstr *CommutedMI =
2121 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2122 if (!CommutedMI)
2123 return nullptr;
2124 // After swapping the MOVCC operands, also invert the condition.
2125 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2127 return CommutedMI;
2128 }
2129 }
2130 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2131}
2132
2133/// Identify instructions that can be folded into a MOVCC instruction, and
2134/// return the defining instruction.
2136ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2137 const TargetInstrInfo *TII) const {
2138 if (!Reg.isVirtual())
2139 return nullptr;
2140 if (!MRI.hasOneNonDBGUse(Reg))
2141 return nullptr;
2142 MachineInstr *MI = MRI.getVRegDef(Reg);
2143 if (!MI)
2144 return nullptr;
2145 // Check if MI can be predicated and folded into the MOVCC.
2146 if (!isPredicable(*MI))
2147 return nullptr;
2148 // Check if MI has any non-dead defs or physreg uses. This also detects
2149 // predicated instructions which will be reading CPSR.
2150 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2151 // Reject frame index operands, PEI can't handle the predicated pseudos.
2152 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2153 return nullptr;
2154 if (!MO.isReg())
2155 continue;
2156 // MI can't have any tied operands, that would conflict with predication.
2157 if (MO.isTied())
2158 return nullptr;
2159 if (MO.getReg().isPhysical())
2160 return nullptr;
2161 if (MO.isDef() && !MO.isDead())
2162 return nullptr;
2163 }
2164 bool DontMoveAcrossStores = true;
2165 if (!MI->isSafeToMove(DontMoveAcrossStores))
2166 return nullptr;
2167 return MI;
2168}
2169
2172 unsigned &TrueOp, unsigned &FalseOp,
2173 bool &Optimizable) const {
2174 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2175 "Unknown select instruction");
2176 // MOVCC operands:
2177 // 0: Def.
2178 // 1: True use.
2179 // 2: False use.
2180 // 3: Condition code.
2181 // 4: CPSR use.
2182 TrueOp = 1;
2183 FalseOp = 2;
2184 Cond.push_back(MI.getOperand(3));
2185 Cond.push_back(MI.getOperand(4));
2186 // We can always fold a def.
2187 Optimizable = true;
2188 return false;
2189}
2190
2194 bool PreferFalse) const {
2195 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2196 "Unknown select instruction");
2197 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2198 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2199 bool Invert = !DefMI;
2200 if (!DefMI)
2201 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2202 if (!DefMI)
2203 return nullptr;
2204
2205 // Find new register class to use.
2206 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2207 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2208 Register DestReg = MI.getOperand(0).getReg();
2209 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2210 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2211 if (!MRI.constrainRegClass(DestReg, FalseClass))
2212 return nullptr;
2213 if (!MRI.constrainRegClass(DestReg, TrueClass))
2214 return nullptr;
2215
2216 // Create a new predicated version of DefMI.
2217 // Rfalse is the first use.
2218 MachineInstrBuilder NewMI =
2219 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2220
2221 // Copy all the DefMI operands, excluding its (null) predicate.
2222 const MCInstrDesc &DefDesc = DefMI->getDesc();
2223 for (unsigned i = 1, e = DefDesc.getNumOperands();
2224 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2225 NewMI.add(DefMI->getOperand(i));
2226
2227 unsigned CondCode = MI.getOperand(3).getImm();
2228 if (Invert)
2230 else
2231 NewMI.addImm(CondCode);
2232 NewMI.add(MI.getOperand(4));
2233
2234 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2235 if (NewMI->hasOptionalDef())
2236 NewMI.add(condCodeOp());
2237
2238 // The output register value when the predicate is false is an implicit
2239 // register operand tied to the first def.
2240 // The tie makes the register allocator ensure the FalseReg is allocated the
2241 // same register as operand 0.
2242 FalseReg.setImplicit();
2243 NewMI.add(FalseReg);
2244 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2245
2246 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2247 SeenMIs.insert(NewMI);
2248 SeenMIs.erase(DefMI);
2249
2250 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2251 // DefMI would be invalid when tranferred inside the loop. Checking for a
2252 // loop is expensive, but at least remove kill flags if they are in different
2253 // BBs.
2254 if (DefMI->getParent() != MI.getParent())
2255 NewMI->clearKillInfo();
2256
2257 // The caller will erase MI, but not DefMI.
2258 DefMI->eraseFromParent();
2259 return NewMI;
2260}
2261
2262/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2263/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2264/// def operand.
2265///
2266/// This will go away once we can teach tblgen how to set the optional CPSR def
2267/// operand itself.
2272
2274 {ARM::ADDSri, ARM::ADDri},
2275 {ARM::ADDSrr, ARM::ADDrr},
2276 {ARM::ADDSrsi, ARM::ADDrsi},
2277 {ARM::ADDSrsr, ARM::ADDrsr},
2278
2279 {ARM::SUBSri, ARM::SUBri},
2280 {ARM::SUBSrr, ARM::SUBrr},
2281 {ARM::SUBSrsi, ARM::SUBrsi},
2282 {ARM::SUBSrsr, ARM::SUBrsr},
2283
2284 {ARM::RSBSri, ARM::RSBri},
2285 {ARM::RSBSrsi, ARM::RSBrsi},
2286 {ARM::RSBSrsr, ARM::RSBrsr},
2287
2288 {ARM::tADDSi3, ARM::tADDi3},
2289 {ARM::tADDSi8, ARM::tADDi8},
2290 {ARM::tADDSrr, ARM::tADDrr},
2291 {ARM::tADCS, ARM::tADC},
2292
2293 {ARM::tSUBSi3, ARM::tSUBi3},
2294 {ARM::tSUBSi8, ARM::tSUBi8},
2295 {ARM::tSUBSrr, ARM::tSUBrr},
2296 {ARM::tSBCS, ARM::tSBC},
2297 {ARM::tRSBS, ARM::tRSB},
2298 {ARM::tLSLSri, ARM::tLSLri},
2299
2300 {ARM::t2ADDSri, ARM::t2ADDri},
2301 {ARM::t2ADDSrr, ARM::t2ADDrr},
2302 {ARM::t2ADDSrs, ARM::t2ADDrs},
2303
2304 {ARM::t2SUBSri, ARM::t2SUBri},
2305 {ARM::t2SUBSrr, ARM::t2SUBrr},
2306 {ARM::t2SUBSrs, ARM::t2SUBrs},
2307
2308 {ARM::t2RSBSri, ARM::t2RSBri},
2309 {ARM::t2RSBSrs, ARM::t2RSBrs},
2310};
2311
2312unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2313 for (const auto &Entry : AddSubFlagsOpcodeMap)
2314 if (OldOpc == Entry.PseudoOpc)
2315 return Entry.MachineOpc;
2316 return 0;
2317}
2318
2321 const DebugLoc &dl, Register DestReg,
2322 Register BaseReg, int NumBytes,
2323 ARMCC::CondCodes Pred, Register PredReg,
2324 const ARMBaseInstrInfo &TII,
2325 unsigned MIFlags) {
2326 if (NumBytes == 0 && DestReg != BaseReg) {
2327 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2328 .addReg(BaseReg, RegState::Kill)
2329 .add(predOps(Pred, PredReg))
2330 .add(condCodeOp())
2331 .setMIFlags(MIFlags);
2332 return;
2333 }
2334
2335 bool isSub = NumBytes < 0;
2336 if (isSub) NumBytes = -NumBytes;
2337
2338 while (NumBytes) {
2339 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2340 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2341 assert(ThisVal && "Didn't extract field correctly");
2342
2343 // We will handle these bits from offset, clear them.
2344 NumBytes &= ~ThisVal;
2345
2346 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2347
2348 // Build the new ADD / SUB.
2349 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2350 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2351 .addReg(BaseReg, RegState::Kill)
2352 .addImm(ThisVal)
2353 .add(predOps(Pred, PredReg))
2354 .add(condCodeOp())
2355 .setMIFlags(MIFlags);
2356 BaseReg = DestReg;
2357 }
2358}
2359
2362 unsigned NumBytes) {
2363 // This optimisation potentially adds lots of load and store
2364 // micro-operations, it's only really a great benefit to code-size.
2365 if (!Subtarget.hasMinSize())
2366 return false;
2367
2368 // If only one register is pushed/popped, LLVM can use an LDR/STR
2369 // instead. We can't modify those so make sure we're dealing with an
2370 // instruction we understand.
2371 bool IsPop = isPopOpcode(MI->getOpcode());
2372 bool IsPush = isPushOpcode(MI->getOpcode());
2373 if (!IsPush && !IsPop)
2374 return false;
2375
2376 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2377 MI->getOpcode() == ARM::VLDMDIA_UPD;
2378 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2379 MI->getOpcode() == ARM::tPOP ||
2380 MI->getOpcode() == ARM::tPOP_RET;
2381
2382 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2383 MI->getOperand(1).getReg() == ARM::SP)) &&
2384 "trying to fold sp update into non-sp-updating push/pop");
2385
2386 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2387 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2388 // if this is violated.
2389 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2390 return false;
2391
2392 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2393 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2394 int RegListIdx = IsT1PushPop ? 2 : 4;
2395
2396 // Calculate the space we'll need in terms of registers.
2397 unsigned RegsNeeded;
2398 const TargetRegisterClass *RegClass;
2399 if (IsVFPPushPop) {
2400 RegsNeeded = NumBytes / 8;
2401 RegClass = &ARM::DPRRegClass;
2402 } else {
2403 RegsNeeded = NumBytes / 4;
2404 RegClass = &ARM::GPRRegClass;
2405 }
2406
2407 // We're going to have to strip all list operands off before
2408 // re-adding them since the order matters, so save the existing ones
2409 // for later.
2411
2412 // We're also going to need the first register transferred by this
2413 // instruction, which won't necessarily be the first register in the list.
2414 unsigned FirstRegEnc = -1;
2415
2417 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2418 MachineOperand &MO = MI->getOperand(i);
2419 RegList.push_back(MO);
2420
2421 if (MO.isReg() && !MO.isImplicit() &&
2422 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2423 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2424 }
2425
2426 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2427
2428 // Now try to find enough space in the reglist to allocate NumBytes.
2429 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2430 --CurRegEnc) {
2431 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2432 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2433 continue;
2434 if (!IsPop) {
2435 // Pushing any register is completely harmless, mark the register involved
2436 // as undef since we don't care about its value and must not restore it
2437 // during stack unwinding.
2438 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2439 false, false, true));
2440 --RegsNeeded;
2441 continue;
2442 }
2443
2444 // However, we can only pop an extra register if it's not live. For
2445 // registers live within the function we might clobber a return value
2446 // register; the other way a register can be live here is if it's
2447 // callee-saved.
2448 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2449 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2451 // VFP pops don't allow holes in the register list, so any skip is fatal
2452 // for our transformation. GPR pops do, so we should just keep looking.
2453 if (IsVFPPushPop)
2454 return false;
2455 else
2456 continue;
2457 }
2458
2459 // Mark the unimportant registers as <def,dead> in the POP.
2460 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2461 true));
2462 --RegsNeeded;
2463 }
2464
2465 if (RegsNeeded > 0)
2466 return false;
2467
2468 // Finally we know we can profitably perform the optimisation so go
2469 // ahead: strip all existing registers off and add them back again
2470 // in the right order.
2471 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2472 MI->removeOperand(i);
2473
2474 // Add the complete list back in.
2475 MachineInstrBuilder MIB(MF, &*MI);
2476 for (const MachineOperand &MO : llvm::reverse(RegList))
2477 MIB.add(MO);
2478
2479 return true;
2480}
2481
2482bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2483 Register FrameReg, int &Offset,
2484 const ARMBaseInstrInfo &TII) {
2485 unsigned Opcode = MI.getOpcode();
2486 const MCInstrDesc &Desc = MI.getDesc();
2487 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2488 bool isSub = false;
2489
2490 // Memory operands in inline assembly always use AddrMode2.
2491 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2493
2494 if (Opcode == ARM::ADDri) {
2495 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2496 if (Offset == 0) {
2497 // Turn it into a move.
2498 MI.setDesc(TII.get(ARM::MOVr));
2499 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2500 MI.removeOperand(FrameRegIdx+1);
2501 Offset = 0;
2502 return true;
2503 } else if (Offset < 0) {
2504 Offset = -Offset;
2505 isSub = true;
2506 MI.setDesc(TII.get(ARM::SUBri));
2507 }
2508
2509 // Common case: small offset, fits into instruction.
2510 if (ARM_AM::getSOImmVal(Offset) != -1) {
2511 // Replace the FrameIndex with sp / fp
2512 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2513 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2514 Offset = 0;
2515 return true;
2516 }
2517
2518 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2519 // as possible.
2520 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2521 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2522
2523 // We will handle these bits from offset, clear them.
2524 Offset &= ~ThisImmVal;
2525
2526 // Get the properly encoded SOImmVal field.
2527 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2528 "Bit extraction didn't work?");
2529 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2530 } else {
2531 unsigned ImmIdx = 0;
2532 int InstrOffs = 0;
2533 unsigned NumBits = 0;
2534 unsigned Scale = 1;
2535 switch (AddrMode) {
2537 ImmIdx = FrameRegIdx + 1;
2538 InstrOffs = MI.getOperand(ImmIdx).getImm();
2539 NumBits = 12;
2540 break;
2541 case ARMII::AddrMode2:
2542 ImmIdx = FrameRegIdx+2;
2543 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2544 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2545 InstrOffs *= -1;
2546 NumBits = 12;
2547 break;
2548 case ARMII::AddrMode3:
2549 ImmIdx = FrameRegIdx+2;
2550 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2551 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2552 InstrOffs *= -1;
2553 NumBits = 8;
2554 break;
2555 case ARMII::AddrMode4:
2556 case ARMII::AddrMode6:
2557 // Can't fold any offset even if it's zero.
2558 return false;
2559 case ARMII::AddrMode5:
2560 ImmIdx = FrameRegIdx+1;
2561 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2562 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2563 InstrOffs *= -1;
2564 NumBits = 8;
2565 Scale = 4;
2566 break;
2568 ImmIdx = FrameRegIdx+1;
2569 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2570 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2571 InstrOffs *= -1;
2572 NumBits = 8;
2573 Scale = 2;
2574 break;
2578 ImmIdx = FrameRegIdx+1;
2579 InstrOffs = MI.getOperand(ImmIdx).getImm();
2580 NumBits = 7;
2581 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2582 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2583 break;
2584 default:
2585 llvm_unreachable("Unsupported addressing mode!");
2586 }
2587
2588 Offset += InstrOffs * Scale;
2589 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2590 if (Offset < 0) {
2591 Offset = -Offset;
2592 isSub = true;
2593 }
2594
2595 // Attempt to fold address comp. if opcode has offset bits
2596 if (NumBits > 0) {
2597 // Common case: small offset, fits into instruction.
2598 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2599 int ImmedOffset = Offset / Scale;
2600 unsigned Mask = (1 << NumBits) - 1;
2601 if ((unsigned)Offset <= Mask * Scale) {
2602 // Replace the FrameIndex with sp
2603 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2604 // FIXME: When addrmode2 goes away, this will simplify (like the
2605 // T2 version), as the LDR.i12 versions don't need the encoding
2606 // tricks for the offset value.
2607 if (isSub) {
2609 ImmedOffset = -ImmedOffset;
2610 else
2611 ImmedOffset |= 1 << NumBits;
2612 }
2613 ImmOp.ChangeToImmediate(ImmedOffset);
2614 Offset = 0;
2615 return true;
2616 }
2617
2618 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2619 ImmedOffset = ImmedOffset & Mask;
2620 if (isSub) {
2622 ImmedOffset = -ImmedOffset;
2623 else
2624 ImmedOffset |= 1 << NumBits;
2625 }
2626 ImmOp.ChangeToImmediate(ImmedOffset);
2627 Offset &= ~(Mask*Scale);
2628 }
2629 }
2630
2631 Offset = (isSub) ? -Offset : Offset;
2632 return Offset == 0;
2633}
2634
2635/// analyzeCompare - For a comparison instruction, return the source registers
2636/// in SrcReg and SrcReg2 if having two register operands, and the value it
2637/// compares against in CmpValue. Return true if the comparison instruction
2638/// can be analyzed.
2640 Register &SrcReg2, int64_t &CmpMask,
2641 int64_t &CmpValue) const {
2642 switch (MI.getOpcode()) {
2643 default: break;
2644 case ARM::CMPri:
2645 case ARM::t2CMPri:
2646 case ARM::tCMPi8:
2647 SrcReg = MI.getOperand(0).getReg();
2648 SrcReg2 = 0;
2649 CmpMask = ~0;
2650 CmpValue = MI.getOperand(1).getImm();
2651 return true;
2652 case ARM::CMPrr:
2653 case ARM::t2CMPrr:
2654 case ARM::tCMPr:
2655 SrcReg = MI.getOperand(0).getReg();
2656 SrcReg2 = MI.getOperand(1).getReg();
2657 CmpMask = ~0;
2658 CmpValue = 0;
2659 return true;
2660 case ARM::TSTri:
2661 case ARM::t2TSTri:
2662 SrcReg = MI.getOperand(0).getReg();
2663 SrcReg2 = 0;
2664 CmpMask = MI.getOperand(1).getImm();
2665 CmpValue = 0;
2666 return true;
2667 }
2668
2669 return false;
2670}
2671
2672/// isSuitableForMask - Identify a suitable 'and' instruction that
2673/// operates on the given source register and applies the same mask
2674/// as a 'tst' instruction. Provide a limited look-through for copies.
2675/// When successful, MI will hold the found instruction.
2677 int CmpMask, bool CommonUse) {
2678 switch (MI->getOpcode()) {
2679 case ARM::ANDri:
2680 case ARM::t2ANDri:
2681 if (CmpMask != MI->getOperand(2).getImm())
2682 return false;
2683 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2684 return true;
2685 break;
2686 }
2687
2688 return false;
2689}
2690
2691/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2692/// the condition code if we modify the instructions such that flags are
2693/// set by ADD(a,b,X).
2695 switch (CC) {
2696 default: return ARMCC::AL;
2697 case ARMCC::HS: return ARMCC::LO;
2698 case ARMCC::LO: return ARMCC::HS;
2699 case ARMCC::VS: return ARMCC::VS;
2700 case ARMCC::VC: return ARMCC::VC;
2701 }
2702}
2703
2704/// isRedundantFlagInstr - check whether the first instruction, whose only
2705/// purpose is to update flags, can be made redundant.
2706/// CMPrr can be made redundant by SUBrr if the operands are the same.
2707/// CMPri can be made redundant by SUBri if the operands are the same.
2708/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2709/// This function can be extended later on.
2710inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2711 Register SrcReg, Register SrcReg2,
2712 int64_t ImmValue,
2713 const MachineInstr *OI,
2714 bool &IsThumb1) {
2715 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2716 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2717 ((OI->getOperand(1).getReg() == SrcReg &&
2718 OI->getOperand(2).getReg() == SrcReg2) ||
2719 (OI->getOperand(1).getReg() == SrcReg2 &&
2720 OI->getOperand(2).getReg() == SrcReg))) {
2721 IsThumb1 = false;
2722 return true;
2723 }
2724
2725 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2726 ((OI->getOperand(2).getReg() == SrcReg &&
2727 OI->getOperand(3).getReg() == SrcReg2) ||
2728 (OI->getOperand(2).getReg() == SrcReg2 &&
2729 OI->getOperand(3).getReg() == SrcReg))) {
2730 IsThumb1 = true;
2731 return true;
2732 }
2733
2734 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2735 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2736 OI->getOperand(1).getReg() == SrcReg &&
2737 OI->getOperand(2).getImm() == ImmValue) {
2738 IsThumb1 = false;
2739 return true;
2740 }
2741
2742 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2743 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2744 OI->getOperand(2).getReg() == SrcReg &&
2745 OI->getOperand(3).getImm() == ImmValue) {
2746 IsThumb1 = true;
2747 return true;
2748 }
2749
2750 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2751 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2752 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2753 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2754 OI->getOperand(0).getReg() == SrcReg &&
2755 OI->getOperand(1).getReg() == SrcReg2) {
2756 IsThumb1 = false;
2757 return true;
2758 }
2759
2760 if (CmpI->getOpcode() == ARM::tCMPr &&
2761 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2762 OI->getOpcode() == ARM::tADDrr) &&
2763 OI->getOperand(0).getReg() == SrcReg &&
2764 OI->getOperand(2).getReg() == SrcReg2) {
2765 IsThumb1 = true;
2766 return true;
2767 }
2768
2769 return false;
2770}
2771
2772static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2773 switch (MI->getOpcode()) {
2774 default: return false;
2775 case ARM::tLSLri:
2776 case ARM::tLSRri:
2777 case ARM::tLSLrr:
2778 case ARM::tLSRrr:
2779 case ARM::tSUBrr:
2780 case ARM::tADDrr:
2781 case ARM::tADDi3:
2782 case ARM::tADDi8:
2783 case ARM::tSUBi3:
2784 case ARM::tSUBi8:
2785 case ARM::tMUL:
2786 case ARM::tADC:
2787 case ARM::tSBC:
2788 case ARM::tRSB:
2789 case ARM::tAND:
2790 case ARM::tORR:
2791 case ARM::tEOR:
2792 case ARM::tBIC:
2793 case ARM::tMVN:
2794 case ARM::tASRri:
2795 case ARM::tASRrr:
2796 case ARM::tROR:
2797 IsThumb1 = true;
2798 [[fallthrough]];
2799 case ARM::RSBrr:
2800 case ARM::RSBri:
2801 case ARM::RSCrr:
2802 case ARM::RSCri:
2803 case ARM::ADDrr:
2804 case ARM::ADDri:
2805 case ARM::ADCrr:
2806 case ARM::ADCri:
2807 case ARM::SUBrr:
2808 case ARM::SUBri:
2809 case ARM::SBCrr:
2810 case ARM::SBCri:
2811 case ARM::t2RSBri:
2812 case ARM::t2ADDrr:
2813 case ARM::t2ADDri:
2814 case ARM::t2ADCrr:
2815 case ARM::t2ADCri:
2816 case ARM::t2SUBrr:
2817 case ARM::t2SUBri:
2818 case ARM::t2SBCrr:
2819 case ARM::t2SBCri:
2820 case ARM::ANDrr:
2821 case ARM::ANDri:
2822 case ARM::ANDrsr:
2823 case ARM::ANDrsi:
2824 case ARM::t2ANDrr:
2825 case ARM::t2ANDri:
2826 case ARM::t2ANDrs:
2827 case ARM::ORRrr:
2828 case ARM::ORRri:
2829 case ARM::ORRrsr:
2830 case ARM::ORRrsi:
2831 case ARM::t2ORRrr:
2832 case ARM::t2ORRri:
2833 case ARM::t2ORRrs:
2834 case ARM::EORrr:
2835 case ARM::EORri:
2836 case ARM::EORrsr:
2837 case ARM::EORrsi:
2838 case ARM::t2EORrr:
2839 case ARM::t2EORri:
2840 case ARM::t2EORrs:
2841 case ARM::BICri:
2842 case ARM::BICrr:
2843 case ARM::BICrsi:
2844 case ARM::BICrsr:
2845 case ARM::t2BICri:
2846 case ARM::t2BICrr:
2847 case ARM::t2BICrs:
2848 case ARM::t2LSRri:
2849 case ARM::t2LSRrr:
2850 case ARM::t2LSLri:
2851 case ARM::t2LSLrr:
2852 case ARM::MOVsr:
2853 case ARM::MOVsi:
2854 return true;
2855 }
2856}
2857
2858/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2859/// comparison into one that sets the zero bit in the flags register;
2860/// Remove a redundant Compare instruction if an earlier instruction can set the
2861/// flags in the same way as Compare.
2862/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2863/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2864/// condition code of instructions which use the flags.
2866 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2867 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2868 // Get the unique definition of SrcReg.
2869 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2870 if (!MI) return false;
2871
2872 // Masked compares sometimes use the same register as the corresponding 'and'.
2873 if (CmpMask != ~0) {
2874 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2875 MI = nullptr;
2877 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2878 UI != UE; ++UI) {
2879 if (UI->getParent() != CmpInstr.getParent())
2880 continue;
2881 MachineInstr *PotentialAND = &*UI;
2882 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2883 isPredicated(*PotentialAND))
2884 continue;
2885 MI = PotentialAND;
2886 break;
2887 }
2888 if (!MI) return false;
2889 }
2890 }
2891
2892 // Get ready to iterate backward from CmpInstr.
2893 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2894 B = CmpInstr.getParent()->begin();
2895
2896 // Early exit if CmpInstr is at the beginning of the BB.
2897 if (I == B) return false;
2898
2899 // There are two possible candidates which can be changed to set CPSR:
2900 // One is MI, the other is a SUB or ADD instruction.
2901 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2902 // ADDr[ri](r1, r2, X).
2903 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2904 MachineInstr *SubAdd = nullptr;
2905 if (SrcReg2 != 0)
2906 // MI is not a candidate for CMPrr.
2907 MI = nullptr;
2908 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2909 // Conservatively refuse to convert an instruction which isn't in the same
2910 // BB as the comparison.
2911 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2912 // Thus we cannot return here.
2913 if (CmpInstr.getOpcode() == ARM::CMPri ||
2914 CmpInstr.getOpcode() == ARM::t2CMPri ||
2915 CmpInstr.getOpcode() == ARM::tCMPi8)
2916 MI = nullptr;
2917 else
2918 return false;
2919 }
2920
2921 bool IsThumb1 = false;
2922 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2923 return false;
2924
2925 // We also want to do this peephole for cases like this: if (a*b == 0),
2926 // and optimise away the CMP instruction from the generated code sequence:
2927 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2928 // resulting from the select instruction, but these MOVS instructions for
2929 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2930 // However, if we only have MOVS instructions in between the CMP and the
2931 // other instruction (the MULS in this example), then the CPSR is dead so we
2932 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2933 // reordering and then continue the analysis hoping we can eliminate the
2934 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2935 // consequence, the movs won't redefine/kill the MUL operands which would
2936 // make this reordering illegal.
2938 if (MI && IsThumb1) {
2939 --I;
2940 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2941 bool CanReorder = true;
2942 for (; I != E; --I) {
2943 if (I->getOpcode() != ARM::tMOVi8) {
2944 CanReorder = false;
2945 break;
2946 }
2947 }
2948 if (CanReorder) {
2949 MI = MI->removeFromParent();
2950 E = CmpInstr;
2951 CmpInstr.getParent()->insert(E, MI);
2952 }
2953 }
2954 I = CmpInstr;
2955 E = MI;
2956 }
2957
2958 // Check that CPSR isn't set between the comparison instruction and the one we
2959 // want to change. At the same time, search for SubAdd.
2960 bool SubAddIsThumb1 = false;
2961 do {
2962 const MachineInstr &Instr = *--I;
2963
2964 // Check whether CmpInstr can be made redundant by the current instruction.
2965 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2966 SubAddIsThumb1)) {
2967 SubAdd = &*I;
2968 break;
2969 }
2970
2971 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2972 if (I == E)
2973 break;
2974
2975 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2976 Instr.readsRegister(ARM::CPSR, TRI))
2977 // This instruction modifies or uses CPSR after the one we want to
2978 // change. We can't do this transformation.
2979 return false;
2980
2981 if (I == B) {
2982 // In some cases, we scan the use-list of an instruction for an AND;
2983 // that AND is in the same BB, but may not be scheduled before the
2984 // corresponding TST. In that case, bail out.
2985 //
2986 // FIXME: We could try to reschedule the AND.
2987 return false;
2988 }
2989 } while (true);
2990
2991 // Return false if no candidates exist.
2992 if (!MI && !SubAdd)
2993 return false;
2994
2995 // If we found a SubAdd, use it as it will be closer to the CMP
2996 if (SubAdd) {
2997 MI = SubAdd;
2998 IsThumb1 = SubAddIsThumb1;
2999 }
3000
3001 // We can't use a predicated instruction - it doesn't always write the flags.
3002 if (isPredicated(*MI))
3003 return false;
3004
3005 // Scan forward for the use of CPSR
3006 // When checking against MI: if it's a conditional code that requires
3007 // checking of the V bit or C bit, then this is not safe to do.
3008 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3009 // If we are done with the basic block, we need to check whether CPSR is
3010 // live-out.
3012 OperandsToUpdate;
3013 bool isSafe = false;
3014 I = CmpInstr;
3015 E = CmpInstr.getParent()->end();
3016 while (!isSafe && ++I != E) {
3017 const MachineInstr &Instr = *I;
3018 for (unsigned IO = 0, EO = Instr.getNumOperands();
3019 !isSafe && IO != EO; ++IO) {
3020 const MachineOperand &MO = Instr.getOperand(IO);
3021 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3022 isSafe = true;
3023 break;
3024 }
3025 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3026 continue;
3027 if (MO.isDef()) {
3028 isSafe = true;
3029 break;
3030 }
3031 // Condition code is after the operand before CPSR except for VSELs.
3033 bool IsInstrVSel = true;
3034 switch (Instr.getOpcode()) {
3035 default:
3036 IsInstrVSel = false;
3037 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3038 break;
3039 case ARM::VSELEQD:
3040 case ARM::VSELEQS:
3041 case ARM::VSELEQH:
3042 CC = ARMCC::EQ;
3043 break;
3044 case ARM::VSELGTD:
3045 case ARM::VSELGTS:
3046 case ARM::VSELGTH:
3047 CC = ARMCC::GT;
3048 break;
3049 case ARM::VSELGED:
3050 case ARM::VSELGES:
3051 case ARM::VSELGEH:
3052 CC = ARMCC::GE;
3053 break;
3054 case ARM::VSELVSD:
3055 case ARM::VSELVSS:
3056 case ARM::VSELVSH:
3057 CC = ARMCC::VS;
3058 break;
3059 }
3060
3061 if (SubAdd) {
3062 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3063 // on CMP needs to be updated to be based on SUB.
3064 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3065 // needs to be modified.
3066 // Push the condition code operands to OperandsToUpdate.
3067 // If it is safe to remove CmpInstr, the condition code of these
3068 // operands will be modified.
3069 unsigned Opc = SubAdd->getOpcode();
3070 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3071 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3072 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3073 Opc == ARM::tSUBi8;
3074 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3075 if (!IsSub ||
3076 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3077 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3078 // VSel doesn't support condition code update.
3079 if (IsInstrVSel)
3080 return false;
3081 // Ensure we can swap the condition.
3082 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3083 if (NewCC == ARMCC::AL)
3084 return false;
3085 OperandsToUpdate.push_back(
3086 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3087 }
3088 } else {
3089 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3090 switch (CC) {
3091 case ARMCC::EQ: // Z
3092 case ARMCC::NE: // Z
3093 case ARMCC::MI: // N
3094 case ARMCC::PL: // N
3095 case ARMCC::AL: // none
3096 // CPSR can be used multiple times, we should continue.
3097 break;
3098 case ARMCC::HS: // C
3099 case ARMCC::LO: // C
3100 case ARMCC::VS: // V
3101 case ARMCC::VC: // V
3102 case ARMCC::HI: // C Z
3103 case ARMCC::LS: // C Z
3104 case ARMCC::GE: // N V
3105 case ARMCC::LT: // N V
3106 case ARMCC::GT: // Z N V
3107 case ARMCC::LE: // Z N V
3108 // The instruction uses the V bit or C bit which is not safe.
3109 return false;
3110 }
3111 }
3112 }
3113 }
3114
3115 // If CPSR is not killed nor re-defined, we should check whether it is
3116 // live-out. If it is live-out, do not optimize.
3117 if (!isSafe) {
3118 MachineBasicBlock *MBB = CmpInstr.getParent();
3119 for (MachineBasicBlock *Succ : MBB->successors())
3120 if (Succ->isLiveIn(ARM::CPSR))
3121 return false;
3122 }
3123
3124 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3125 // set CPSR so this is represented as an explicit output)
3126 if (!IsThumb1) {
3127 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3128 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3129 MI->getOperand(CPSRRegNum).setIsDef(true);
3130 }
3131 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3132 CmpInstr.eraseFromParent();
3133
3134 // Modify the condition code of operands in OperandsToUpdate.
3135 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3136 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3137 for (auto &[MO, Cond] : OperandsToUpdate)
3138 MO->setImm(Cond);
3139
3140 MI->clearRegisterDeads(ARM::CPSR);
3141
3142 return true;
3143}
3144
3146 // Do not sink MI if it might be used to optimize a redundant compare.
3147 // We heuristically only look at the instruction immediately following MI to
3148 // avoid potentially searching the entire basic block.
3149 if (isPredicated(MI))
3150 return true;
3152 ++Next;
3153 Register SrcReg, SrcReg2;
3154 int64_t CmpMask, CmpValue;
3155 bool IsThumb1;
3156 if (Next != MI.getParent()->end() &&
3157 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3158 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3159 return false;
3160 return true;
3161}
3162
3164 Register Reg,
3165 MachineRegisterInfo *MRI) const {
3166 // Fold large immediates into add, sub, or, xor.
3167 unsigned DefOpc = DefMI.getOpcode();
3168 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3169 DefOpc != ARM::tMOVi32imm)
3170 return false;
3171 if (!DefMI.getOperand(1).isImm())
3172 // Could be t2MOVi32imm @xx
3173 return false;
3174
3175 if (!MRI->hasOneNonDBGUse(Reg))
3176 return false;
3177
3178 const MCInstrDesc &DefMCID = DefMI.getDesc();
3179 if (DefMCID.hasOptionalDef()) {
3180 unsigned NumOps = DefMCID.getNumOperands();
3181 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3182 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3183 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3184 // to delete DefMI.
3185 return false;
3186 }
3187
3188 const MCInstrDesc &UseMCID = UseMI.getDesc();
3189 if (UseMCID.hasOptionalDef()) {
3190 unsigned NumOps = UseMCID.getNumOperands();
3191 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3192 // If the instruction sets the flag, do not attempt this optimization
3193 // since it may change the semantics of the code.
3194 return false;
3195 }
3196
3197 unsigned UseOpc = UseMI.getOpcode();
3198 unsigned NewUseOpc = 0;
3199 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3200 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3201 bool Commute = false;
3202 switch (UseOpc) {
3203 default: return false;
3204 case ARM::SUBrr:
3205 case ARM::ADDrr:
3206 case ARM::ORRrr:
3207 case ARM::EORrr:
3208 case ARM::t2SUBrr:
3209 case ARM::t2ADDrr:
3210 case ARM::t2ORRrr:
3211 case ARM::t2EORrr: {
3212 Commute = UseMI.getOperand(2).getReg() != Reg;
3213 switch (UseOpc) {
3214 default: break;
3215 case ARM::ADDrr:
3216 case ARM::SUBrr:
3217 if (UseOpc == ARM::SUBrr && Commute)
3218 return false;
3219
3220 // ADD/SUB are special because they're essentially the same operation, so
3221 // we can handle a larger range of immediates.
3222 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3223 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3224 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3225 ImmVal = -ImmVal;
3226 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3227 } else
3228 return false;
3229 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3230 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3231 break;
3232 case ARM::ORRrr:
3233 case ARM::EORrr:
3234 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3235 return false;
3236 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3237 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3238 switch (UseOpc) {
3239 default: break;
3240 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3241 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3242 }
3243 break;
3244 case ARM::t2ADDrr:
3245 case ARM::t2SUBrr: {
3246 if (UseOpc == ARM::t2SUBrr && Commute)
3247 return false;
3248
3249 // ADD/SUB are special because they're essentially the same operation, so
3250 // we can handle a larger range of immediates.
3251 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3252 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3253 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3254 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3255 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3256 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3257 ImmVal = -ImmVal;
3258 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3259 } else
3260 return false;
3261 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3262 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3263 break;
3264 }
3265 case ARM::t2ORRrr:
3266 case ARM::t2EORrr:
3267 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3268 return false;
3269 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3270 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3271 switch (UseOpc) {
3272 default: break;
3273 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3274 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3275 }
3276 break;
3277 }
3278 }
3279 }
3280
3281 unsigned OpIdx = Commute ? 2 : 1;
3282 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3283 bool isKill = UseMI.getOperand(OpIdx).isKill();
3284 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3285 Register NewReg = MRI->createVirtualRegister(TRC);
3286 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3287 NewReg)
3288 .addReg(Reg1, getKillRegState(isKill))
3289 .addImm(SOImmValV1)
3291 .add(condCodeOp());
3292 UseMI.setDesc(get(NewUseOpc));
3293 UseMI.getOperand(1).setReg(NewReg);
3294 UseMI.getOperand(1).setIsKill();
3295 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3296 DefMI.eraseFromParent();
3297 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3298 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3299 // Then the below code will not be needed, as the input/output register
3300 // classes will be rgpr or gprSP.
3301 // For now, we fix the UseMI operand explicitly here:
3302 switch(NewUseOpc){
3303 case ARM::t2ADDspImm:
3304 case ARM::t2SUBspImm:
3305 case ARM::t2ADDri:
3306 case ARM::t2SUBri:
3307 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3308 }
3309 return true;
3310}
3311
3312static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3313 const MachineInstr &MI) {
3314 switch (MI.getOpcode()) {
3315 default: {
3316 const MCInstrDesc &Desc = MI.getDesc();
3317 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3318 assert(UOps >= 0 && "bad # UOps");
3319 return UOps;
3320 }
3321
3322 case ARM::LDRrs:
3323 case ARM::LDRBrs:
3324 case ARM::STRrs:
3325 case ARM::STRBrs: {
3326 unsigned ShOpVal = MI.getOperand(3).getImm();
3327 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3328 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3329 if (!isSub &&
3330 (ShImm == 0 ||
3331 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3332 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3333 return 1;
3334 return 2;
3335 }
3336
3337 case ARM::LDRH:
3338 case ARM::STRH: {
3339 if (!MI.getOperand(2).getReg())
3340 return 1;
3341
3342 unsigned ShOpVal = MI.getOperand(3).getImm();
3343 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3344 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3345 if (!isSub &&
3346 (ShImm == 0 ||
3347 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3348 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3349 return 1;
3350 return 2;
3351 }
3352
3353 case ARM::LDRSB:
3354 case ARM::LDRSH:
3355 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3356
3357 case ARM::LDRSB_POST:
3358 case ARM::LDRSH_POST: {
3359 Register Rt = MI.getOperand(0).getReg();
3360 Register Rm = MI.getOperand(3).getReg();
3361 return (Rt == Rm) ? 4 : 3;
3362 }
3363
3364 case ARM::LDR_PRE_REG:
3365 case ARM::LDRB_PRE_REG: {
3366 Register Rt = MI.getOperand(0).getReg();
3367 Register Rm = MI.getOperand(3).getReg();
3368 if (Rt == Rm)
3369 return 3;
3370 unsigned ShOpVal = MI.getOperand(4).getImm();
3371 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3372 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3373 if (!isSub &&
3374 (ShImm == 0 ||
3375 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3376 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3377 return 2;
3378 return 3;
3379 }
3380
3381 case ARM::STR_PRE_REG:
3382 case ARM::STRB_PRE_REG: {
3383 unsigned ShOpVal = MI.getOperand(4).getImm();
3384 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3385 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3386 if (!isSub &&
3387 (ShImm == 0 ||
3388 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3389 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3390 return 2;
3391 return 3;
3392 }
3393
3394 case ARM::LDRH_PRE:
3395 case ARM::STRH_PRE: {
3396 Register Rt = MI.getOperand(0).getReg();
3397 Register Rm = MI.getOperand(3).getReg();
3398 if (!Rm)
3399 return 2;
3400 if (Rt == Rm)
3401 return 3;
3402 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3403 }
3404
3405 case ARM::LDR_POST_REG:
3406 case ARM::LDRB_POST_REG:
3407 case ARM::LDRH_POST: {
3408 Register Rt = MI.getOperand(0).getReg();
3409 Register Rm = MI.getOperand(3).getReg();
3410 return (Rt == Rm) ? 3 : 2;
3411 }
3412
3413 case ARM::LDR_PRE_IMM:
3414 case ARM::LDRB_PRE_IMM:
3415 case ARM::LDR_POST_IMM:
3416 case ARM::LDRB_POST_IMM:
3417 case ARM::STRB_POST_IMM:
3418 case ARM::STRB_POST_REG:
3419 case ARM::STRB_PRE_IMM:
3420 case ARM::STRH_POST:
3421 case ARM::STR_POST_IMM:
3422 case ARM::STR_POST_REG:
3423 case ARM::STR_PRE_IMM:
3424 return 2;
3425
3426 case ARM::LDRSB_PRE:
3427 case ARM::LDRSH_PRE: {
3428 Register Rm = MI.getOperand(3).getReg();
3429 if (Rm == 0)
3430 return 3;
3431 Register Rt = MI.getOperand(0).getReg();
3432 if (Rt == Rm)
3433 return 4;
3434 unsigned ShOpVal = MI.getOperand(4).getImm();
3435 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3436 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3437 if (!isSub &&
3438 (ShImm == 0 ||
3439 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3440 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3441 return 3;
3442 return 4;
3443 }
3444
3445 case ARM::LDRD: {
3446 Register Rt = MI.getOperand(0).getReg();
3447 Register Rn = MI.getOperand(2).getReg();
3448 Register Rm = MI.getOperand(3).getReg();
3449 if (Rm)
3450 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3451 : 3;
3452 return (Rt == Rn) ? 3 : 2;
3453 }
3454
3455 case ARM::STRD: {
3456 Register Rm = MI.getOperand(3).getReg();
3457 if (Rm)
3458 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3459 : 3;
3460 return 2;
3461 }
3462
3463 case ARM::LDRD_POST:
3464 case ARM::t2LDRD_POST:
3465 return 3;
3466
3467 case ARM::STRD_POST:
3468 case ARM::t2STRD_POST:
3469 return 4;
3470
3471 case ARM::LDRD_PRE: {
3472 Register Rt = MI.getOperand(0).getReg();
3473 Register Rn = MI.getOperand(3).getReg();
3474 Register Rm = MI.getOperand(4).getReg();
3475 if (Rm)
3476 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3477 : 4;
3478 return (Rt == Rn) ? 4 : 3;
3479 }
3480
3481 case ARM::t2LDRD_PRE: {
3482 Register Rt = MI.getOperand(0).getReg();
3483 Register Rn = MI.getOperand(3).getReg();
3484 return (Rt == Rn) ? 4 : 3;
3485 }
3486
3487 case ARM::STRD_PRE: {
3488 Register Rm = MI.getOperand(4).getReg();
3489 if (Rm)
3490 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3491 : 4;
3492 return 3;
3493 }
3494
3495 case ARM::t2STRD_PRE:
3496 return 3;
3497
3498 case ARM::t2LDR_POST:
3499 case ARM::t2LDRB_POST:
3500 case ARM::t2LDRB_PRE:
3501 case ARM::t2LDRSBi12:
3502 case ARM::t2LDRSBi8:
3503 case ARM::t2LDRSBpci:
3504 case ARM::t2LDRSBs:
3505 case ARM::t2LDRH_POST:
3506 case ARM::t2LDRH_PRE:
3507 case ARM::t2LDRSBT:
3508 case ARM::t2LDRSB_POST:
3509 case ARM::t2LDRSB_PRE:
3510 case ARM::t2LDRSH_POST:
3511 case ARM::t2LDRSH_PRE:
3512 case ARM::t2LDRSHi12:
3513 case ARM::t2LDRSHi8:
3514 case ARM::t2LDRSHpci:
3515 case ARM::t2LDRSHs:
3516 return 2;
3517
3518 case ARM::t2LDRDi8: {
3519 Register Rt = MI.getOperand(0).getReg();
3520 Register Rn = MI.getOperand(2).getReg();
3521 return (Rt == Rn) ? 3 : 2;
3522 }
3523
3524 case ARM::t2STRB_POST:
3525 case ARM::t2STRB_PRE:
3526 case ARM::t2STRBs:
3527 case ARM::t2STRDi8:
3528 case ARM::t2STRH_POST:
3529 case ARM::t2STRH_PRE:
3530 case ARM::t2STRHs:
3531 case ARM::t2STR_POST:
3532 case ARM::t2STR_PRE:
3533 case ARM::t2STRs:
3534 return 2;
3535 }
3536}
3537
3538// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3539// can't be easily determined return 0 (missing MachineMemOperand).
3540//
3541// FIXME: The current MachineInstr design does not support relying on machine
3542// mem operands to determine the width of a memory access. Instead, we expect
3543// the target to provide this information based on the instruction opcode and
3544// operands. However, using MachineMemOperand is the best solution now for
3545// two reasons:
3546//
3547// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3548// operands. This is much more dangerous than using the MachineMemOperand
3549// sizes because CodeGen passes can insert/remove optional machine operands. In
3550// fact, it's totally incorrect for preRA passes and appears to be wrong for
3551// postRA passes as well.
3552//
3553// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3554// machine model that calls this should handle the unknown (zero size) case.
3555//
3556// Long term, we should require a target hook that verifies MachineMemOperand
3557// sizes during MC lowering. That target hook should be local to MC lowering
3558// because we can't ensure that it is aware of other MI forms. Doing this will
3559// ensure that MachineMemOperands are correctly propagated through all passes.
3561 unsigned Size = 0;
3562 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3563 E = MI.memoperands_end();
3564 I != E; ++I) {
3565 Size += (*I)->getSize().getValue();
3566 }
3567 // FIXME: The scheduler currently can't handle values larger than 16. But
3568 // the values can actually go up to 32 for floating-point load/store
3569 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3570 // operations isn't right; we could end up with "extra" memory operands for
3571 // various reasons, like tail merge merging two memory operations.
3572 return std::min(Size / 4, 16U);
3573}
3574
3576 unsigned NumRegs) {
3577 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3578 switch (Opc) {
3579 default:
3580 break;
3581 case ARM::VLDMDIA_UPD:
3582 case ARM::VLDMDDB_UPD:
3583 case ARM::VLDMSIA_UPD:
3584 case ARM::VLDMSDB_UPD:
3585 case ARM::VSTMDIA_UPD:
3586 case ARM::VSTMDDB_UPD:
3587 case ARM::VSTMSIA_UPD:
3588 case ARM::VSTMSDB_UPD:
3589 case ARM::LDMIA_UPD:
3590 case ARM::LDMDA_UPD:
3591 case ARM::LDMDB_UPD:
3592 case ARM::LDMIB_UPD:
3593 case ARM::STMIA_UPD:
3594 case ARM::STMDA_UPD:
3595 case ARM::STMDB_UPD:
3596 case ARM::STMIB_UPD:
3597 case ARM::tLDMIA_UPD:
3598 case ARM::tSTMIA_UPD:
3599 case ARM::t2LDMIA_UPD:
3600 case ARM::t2LDMDB_UPD:
3601 case ARM::t2STMIA_UPD:
3602 case ARM::t2STMDB_UPD:
3603 ++UOps; // One for base register writeback.
3604 break;
3605 case ARM::LDMIA_RET:
3606 case ARM::tPOP_RET:
3607 case ARM::t2LDMIA_RET:
3608 UOps += 2; // One for base reg wb, one for write to pc.
3609 break;
3610 }
3611 return UOps;
3612}
3613
3615 const MachineInstr &MI) const {
3616 if (!ItinData || ItinData->isEmpty())
3617 return 1;
3618
3619 const MCInstrDesc &Desc = MI.getDesc();
3620 unsigned Class = Desc.getSchedClass();
3621 int ItinUOps = ItinData->getNumMicroOps(Class);
3622 if (ItinUOps >= 0) {
3623 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3624 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3625
3626 return ItinUOps;
3627 }
3628
3629 unsigned Opc = MI.getOpcode();
3630 switch (Opc) {
3631 default:
3632 llvm_unreachable("Unexpected multi-uops instruction!");
3633 case ARM::VLDMQIA:
3634 case ARM::VSTMQIA:
3635 return 2;
3636
3637 // The number of uOps for load / store multiple are determined by the number
3638 // registers.
3639 //
3640 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3641 // same cycle. The scheduling for the first load / store must be done
3642 // separately by assuming the address is not 64-bit aligned.
3643 //
3644 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3645 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3646 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3647 case ARM::VLDMDIA:
3648 case ARM::VLDMDIA_UPD:
3649 case ARM::VLDMDDB_UPD:
3650 case ARM::VLDMSIA:
3651 case ARM::VLDMSIA_UPD:
3652 case ARM::VLDMSDB_UPD:
3653 case ARM::VSTMDIA:
3654 case ARM::VSTMDIA_UPD:
3655 case ARM::VSTMDDB_UPD:
3656 case ARM::VSTMSIA:
3657 case ARM::VSTMSIA_UPD:
3658 case ARM::VSTMSDB_UPD: {
3659 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3660 return (NumRegs / 2) + (NumRegs % 2) + 1;
3661 }
3662
3663 case ARM::LDMIA_RET:
3664 case ARM::LDMIA:
3665 case ARM::LDMDA:
3666 case ARM::LDMDB:
3667 case ARM::LDMIB:
3668 case ARM::LDMIA_UPD:
3669 case ARM::LDMDA_UPD:
3670 case ARM::LDMDB_UPD:
3671 case ARM::LDMIB_UPD:
3672 case ARM::STMIA:
3673 case ARM::STMDA:
3674 case ARM::STMDB:
3675 case ARM::STMIB:
3676 case ARM::STMIA_UPD:
3677 case ARM::STMDA_UPD:
3678 case ARM::STMDB_UPD:
3679 case ARM::STMIB_UPD:
3680 case ARM::tLDMIA:
3681 case ARM::tLDMIA_UPD:
3682 case ARM::tSTMIA_UPD:
3683 case ARM::tPOP_RET:
3684 case ARM::tPOP:
3685 case ARM::tPUSH:
3686 case ARM::t2LDMIA_RET:
3687 case ARM::t2LDMIA:
3688 case ARM::t2LDMDB:
3689 case ARM::t2LDMIA_UPD:
3690 case ARM::t2LDMDB_UPD:
3691 case ARM::t2STMIA:
3692 case ARM::t2STMDB:
3693 case ARM::t2STMIA_UPD:
3694 case ARM::t2STMDB_UPD: {
3695 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3696 switch (Subtarget.getLdStMultipleTiming()) {
3700 // Assume the worst.
3701 return NumRegs;
3703 if (NumRegs < 4)
3704 return 2;
3705 // 4 registers would be issued: 2, 2.
3706 // 5 registers would be issued: 2, 2, 1.
3707 unsigned UOps = (NumRegs / 2);
3708 if (NumRegs % 2)
3709 ++UOps;
3710 return UOps;
3711 }
3713 unsigned UOps = (NumRegs / 2);
3714 // If there are odd number of registers or if it's not 64-bit aligned,
3715 // then it takes an extra AGU (Address Generation Unit) cycle.
3716 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3717 (*MI.memoperands_begin())->getAlign() < Align(8))
3718 ++UOps;
3719 return UOps;
3720 }
3721 }
3722 }
3723 }
3724 llvm_unreachable("Didn't find the number of microops");
3725}
3726
3727std::optional<unsigned>
3728ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3729 const MCInstrDesc &DefMCID, unsigned DefClass,
3730 unsigned DefIdx, unsigned DefAlign) const {
3731 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3732 if (RegNo <= 0)
3733 // Def is the address writeback.
3734 return ItinData->getOperandCycle(DefClass, DefIdx);
3735
3736 unsigned DefCycle;
3737 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3738 // (regno / 2) + (regno % 2) + 1
3739 DefCycle = RegNo / 2 + 1;
3740 if (RegNo % 2)
3741 ++DefCycle;
3742 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3743 DefCycle = RegNo;
3744 bool isSLoad = false;
3745
3746 switch (DefMCID.getOpcode()) {
3747 default: break;
3748 case ARM::VLDMSIA:
3749 case ARM::VLDMSIA_UPD:
3750 case ARM::VLDMSDB_UPD:
3751 isSLoad = true;
3752 break;
3753 }
3754
3755 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3756 // then it takes an extra cycle.
3757 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3758 ++DefCycle;
3759 } else {
3760 // Assume the worst.
3761 DefCycle = RegNo + 2;
3762 }
3763
3764 return DefCycle;
3765}
3766
3767std::optional<unsigned>
3768ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3769 const MCInstrDesc &DefMCID, unsigned DefClass,
3770 unsigned DefIdx, unsigned DefAlign) const {
3771 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3772 if (RegNo <= 0)
3773 // Def is the address writeback.
3774 return ItinData->getOperandCycle(DefClass, DefIdx);
3775
3776 unsigned DefCycle;
3777 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3778 // 4 registers would be issued: 1, 2, 1.
3779 // 5 registers would be issued: 1, 2, 2.
3780 DefCycle = RegNo / 2;
3781 if (DefCycle < 1)
3782 DefCycle = 1;
3783 // Result latency is issue cycle + 2: E2.
3784 DefCycle += 2;
3785 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3786 DefCycle = (RegNo / 2);
3787 // If there are odd number of registers or if it's not 64-bit aligned,
3788 // then it takes an extra AGU (Address Generation Unit) cycle.
3789 if ((RegNo % 2) || DefAlign < 8)
3790 ++DefCycle;
3791 // Result latency is AGU cycles + 2.
3792 DefCycle += 2;
3793 } else {
3794 // Assume the worst.
3795 DefCycle = RegNo + 2;
3796 }
3797
3798 return DefCycle;
3799}
3800
3801std::optional<unsigned>
3802ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3803 const MCInstrDesc &UseMCID, unsigned UseClass,
3804 unsigned UseIdx, unsigned UseAlign) const {
3805 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3806 if (RegNo <= 0)
3807 return ItinData->getOperandCycle(UseClass, UseIdx);
3808
3809 unsigned UseCycle;
3810 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3811 // (regno / 2) + (regno % 2) + 1
3812 UseCycle = RegNo / 2 + 1;
3813 if (RegNo % 2)
3814 ++UseCycle;
3815 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3816 UseCycle = RegNo;
3817 bool isSStore = false;
3818
3819 switch (UseMCID.getOpcode()) {
3820 default: break;
3821 case ARM::VSTMSIA:
3822 case ARM::VSTMSIA_UPD:
3823 case ARM::VSTMSDB_UPD:
3824 isSStore = true;
3825 break;
3826 }
3827
3828 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3829 // then it takes an extra cycle.
3830 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3831 ++UseCycle;
3832 } else {
3833 // Assume the worst.
3834 UseCycle = RegNo + 2;
3835 }
3836
3837 return UseCycle;
3838}
3839
3840std::optional<unsigned>
3841ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3842 const MCInstrDesc &UseMCID, unsigned UseClass,
3843 unsigned UseIdx, unsigned UseAlign) const {
3844 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3845 if (RegNo <= 0)
3846 return ItinData->getOperandCycle(UseClass, UseIdx);
3847
3848 unsigned UseCycle;
3849 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3850 UseCycle = RegNo / 2;
3851 if (UseCycle < 2)
3852 UseCycle = 2;
3853 // Read in E3.
3854 UseCycle += 2;
3855 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3856 UseCycle = (RegNo / 2);
3857 // If there are odd number of registers or if it's not 64-bit aligned,
3858 // then it takes an extra AGU (Address Generation Unit) cycle.
3859 if ((RegNo % 2) || UseAlign < 8)
3860 ++UseCycle;
3861 } else {
3862 // Assume the worst.
3863 UseCycle = 1;
3864 }
3865 return UseCycle;
3866}
3867
3868std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3869 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3870 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3871 unsigned UseIdx, unsigned UseAlign) const {
3872 unsigned DefClass = DefMCID.getSchedClass();
3873 unsigned UseClass = UseMCID.getSchedClass();
3874
3875 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3876 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3877
3878 // This may be a def / use of a variable_ops instruction, the operand
3879 // latency might be determinable dynamically. Let the target try to
3880 // figure it out.
3881 std::optional<unsigned> DefCycle;
3882 bool LdmBypass = false;
3883 switch (DefMCID.getOpcode()) {
3884 default:
3885 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3886 break;
3887
3888 case ARM::VLDMDIA:
3889 case ARM::VLDMDIA_UPD:
3890 case ARM::VLDMDDB_UPD:
3891 case ARM::VLDMSIA:
3892 case ARM::VLDMSIA_UPD:
3893 case ARM::VLDMSDB_UPD:
3894 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3895 break;
3896
3897 case ARM::LDMIA_RET:
3898 case ARM::LDMIA:
3899 case ARM::LDMDA:
3900 case ARM::LDMDB:
3901 case ARM::LDMIB:
3902 case ARM::LDMIA_UPD:
3903 case ARM::LDMDA_UPD:
3904 case ARM::LDMDB_UPD:
3905 case ARM::LDMIB_UPD:
3906 case ARM::tLDMIA:
3907 case ARM::tLDMIA_UPD:
3908 case ARM::tPUSH:
3909 case ARM::t2LDMIA_RET:
3910 case ARM::t2LDMIA:
3911 case ARM::t2LDMDB:
3912 case ARM::t2LDMIA_UPD:
3913 case ARM::t2LDMDB_UPD:
3914 LdmBypass = true;
3915 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3916 break;
3917 }
3918
3919 if (!DefCycle)
3920 // We can't seem to determine the result latency of the def, assume it's 2.
3921 DefCycle = 2;
3922
3923 std::optional<unsigned> UseCycle;
3924 switch (UseMCID.getOpcode()) {
3925 default:
3926 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3927 break;
3928
3929 case ARM::VSTMDIA:
3930 case ARM::VSTMDIA_UPD:
3931 case ARM::VSTMDDB_UPD:
3932 case ARM::VSTMSIA:
3933 case ARM::VSTMSIA_UPD:
3934 case ARM::VSTMSDB_UPD:
3935 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3936 break;
3937
3938 case ARM::STMIA:
3939 case ARM::STMDA:
3940 case ARM::STMDB:
3941 case ARM::STMIB:
3942 case ARM::STMIA_UPD:
3943 case ARM::STMDA_UPD:
3944 case ARM::STMDB_UPD:
3945 case ARM::STMIB_UPD:
3946 case ARM::tSTMIA_UPD:
3947 case ARM::tPOP_RET:
3948 case ARM::tPOP:
3949 case ARM::t2STMIA:
3950 case ARM::t2STMDB:
3951 case ARM::t2STMIA_UPD:
3952 case ARM::t2STMDB_UPD:
3953 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3954 break;
3955 }
3956
3957 if (!UseCycle)
3958 // Assume it's read in the first stage.
3959 UseCycle = 1;
3960
3961 if (UseCycle > *DefCycle + 1)
3962 return std::nullopt;
3963
3964 UseCycle = *DefCycle - *UseCycle + 1;
3965 if (UseCycle > 0u) {
3966 if (LdmBypass) {
3967 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3968 // first def operand.
3969 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3970 UseClass, UseIdx))
3971 UseCycle = *UseCycle - 1;
3972 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3973 UseClass, UseIdx)) {
3974 UseCycle = *UseCycle - 1;
3975 }
3976 }
3977
3978 return UseCycle;
3979}
3980
3982 const MachineInstr *MI, unsigned Reg,
3983 unsigned &DefIdx, unsigned &Dist) {
3984 Dist = 0;
3985
3987 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3988 assert(II->isInsideBundle() && "Empty bundle?");
3989
3990 int Idx = -1;
3991 while (II->isInsideBundle()) {
3992 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
3993 if (Idx != -1)
3994 break;
3995 --II;
3996 ++Dist;
3997 }
3998
3999 assert(Idx != -1 && "Cannot find bundled definition!");
4000 DefIdx = Idx;
4001 return &*II;
4002}
4003
4005 const MachineInstr &MI, unsigned Reg,
4006 unsigned &UseIdx, unsigned &Dist) {
4007 Dist = 0;
4008
4010 assert(II->isInsideBundle() && "Empty bundle?");
4011 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4012
4013 // FIXME: This doesn't properly handle multiple uses.
4014 int Idx = -1;
4015 while (II != E && II->isInsideBundle()) {
4016 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4017 if (Idx != -1)
4018 break;
4019 if (II->getOpcode() != ARM::t2IT)
4020 ++Dist;
4021 ++II;
4022 }
4023
4024 if (Idx == -1) {
4025 Dist = 0;
4026 return nullptr;
4027 }
4028
4029 UseIdx = Idx;
4030 return &*II;
4031}
4032
4033/// Return the number of cycles to add to (or subtract from) the static
4034/// itinerary based on the def opcode and alignment. The caller will ensure that
4035/// adjusted latency is at least one cycle.
4036static int adjustDefLatency(const ARMSubtarget &Subtarget,
4037 const MachineInstr &DefMI,
4038 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4039 int Adjust = 0;
4040 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4041 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4042 // variants are one cycle cheaper.
4043 switch (DefMCID.getOpcode()) {
4044 default: break;
4045 case ARM::LDRrs:
4046 case ARM::LDRBrs: {
4047 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4048 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4049 if (ShImm == 0 ||
4050 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4051 --Adjust;
4052 break;
4053 }
4054 case ARM::t2LDRs:
4055 case ARM::t2LDRBs:
4056 case ARM::t2LDRHs:
4057 case ARM::t2LDRSHs: {
4058 // Thumb2 mode: lsl only.
4059 unsigned ShAmt = DefMI.getOperand(3).getImm();
4060 if (ShAmt == 0 || ShAmt == 2)
4061 --Adjust;
4062 break;
4063 }
4064 }
4065 } else if (Subtarget.isSwift()) {
4066 // FIXME: Properly handle all of the latency adjustments for address
4067 // writeback.
4068 switch (DefMCID.getOpcode()) {
4069 default: break;
4070 case ARM::LDRrs:
4071 case ARM::LDRBrs: {
4072 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4073 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4074 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4075 if (!isSub &&
4076 (ShImm == 0 ||
4077 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4078 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4079 Adjust -= 2;
4080 else if (!isSub &&
4081 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4082 --Adjust;
4083 break;
4084 }
4085 case ARM::t2LDRs:
4086 case ARM::t2LDRBs:
4087 case ARM::t2LDRHs:
4088 case ARM::t2LDRSHs: {
4089 // Thumb2 mode: lsl only.
4090 unsigned ShAmt = DefMI.getOperand(3).getImm();
4091 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4092 Adjust -= 2;
4093 break;
4094 }
4095 }
4096 }
4097
4098 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4099 switch (DefMCID.getOpcode()) {
4100 default: break;
4101 case ARM::VLD1q8:
4102 case ARM::VLD1q16:
4103 case ARM::VLD1q32:
4104 case ARM::VLD1q64:
4105 case ARM::VLD1q8wb_fixed:
4106 case ARM::VLD1q16wb_fixed:
4107 case ARM::VLD1q32wb_fixed:
4108 case ARM::VLD1q64wb_fixed:
4109 case ARM::VLD1q8wb_register:
4110 case ARM::VLD1q16wb_register:
4111 case ARM::VLD1q32wb_register:
4112 case ARM::VLD1q64wb_register:
4113 case ARM::VLD2d8:
4114 case ARM::VLD2d16:
4115 case ARM::VLD2d32:
4116 case ARM::VLD2q8:
4117 case ARM::VLD2q16:
4118 case ARM::VLD2q32:
4119 case ARM::VLD2d8wb_fixed:
4120 case ARM::VLD2d16wb_fixed:
4121 case ARM::VLD2d32wb_fixed:
4122 case ARM::VLD2q8wb_fixed:
4123 case ARM::VLD2q16wb_fixed:
4124 case ARM::VLD2q32wb_fixed:
4125 case ARM::VLD2d8wb_register:
4126 case ARM::VLD2d16wb_register:
4127 case ARM::VLD2d32wb_register:
4128 case ARM::VLD2q8wb_register:
4129 case ARM::VLD2q16wb_register:
4130 case ARM::VLD2q32wb_register:
4131 case ARM::VLD3d8:
4132 case ARM::VLD3d16:
4133 case ARM::VLD3d32:
4134 case ARM::VLD1d64T:
4135 case ARM::VLD3d8_UPD:
4136 case ARM::VLD3d16_UPD:
4137 case ARM::VLD3d32_UPD:
4138 case ARM::VLD1d64Twb_fixed:
4139 case ARM::VLD1d64Twb_register:
4140 case ARM::VLD3q8_UPD:
4141 case ARM::VLD3q16_UPD:
4142 case ARM::VLD3q32_UPD:
4143 case ARM::VLD4d8:
4144 case ARM::VLD4d16:
4145 case ARM::VLD4d32:
4146 case ARM::VLD1d64Q:
4147 case ARM::VLD4d8_UPD:
4148 case ARM::VLD4d16_UPD:
4149 case ARM::VLD4d32_UPD:
4150 case ARM::VLD1d64Qwb_fixed:
4151 case ARM::VLD1d64Qwb_register:
4152 case ARM::VLD4q8_UPD:
4153 case ARM::VLD4q16_UPD:
4154 case ARM::VLD4q32_UPD:
4155 case ARM::VLD1DUPq8:
4156 case ARM::VLD1DUPq16:
4157 case ARM::VLD1DUPq32:
4158 case ARM::VLD1DUPq8wb_fixed:
4159 case ARM::VLD1DUPq16wb_fixed:
4160 case ARM::VLD1DUPq32wb_fixed:
4161 case ARM::VLD1DUPq8wb_register:
4162 case ARM::VLD1DUPq16wb_register:
4163 case ARM::VLD1DUPq32wb_register:
4164 case ARM::VLD2DUPd8:
4165 case ARM::VLD2DUPd16:
4166 case ARM::VLD2DUPd32:
4167 case ARM::VLD2DUPd8wb_fixed:
4168 case ARM::VLD2DUPd16wb_fixed:
4169 case ARM::VLD2DUPd32wb_fixed:
4170 case ARM::VLD2DUPd8wb_register:
4171 case ARM::VLD2DUPd16wb_register:
4172 case ARM::VLD2DUPd32wb_register:
4173 case ARM::VLD4DUPd8:
4174 case ARM::VLD4DUPd16:
4175 case ARM::VLD4DUPd32:
4176 case ARM::VLD4DUPd8_UPD:
4177 case ARM::VLD4DUPd16_UPD:
4178 case ARM::VLD4DUPd32_UPD:
4179 case ARM::VLD1LNd8:
4180 case ARM::VLD1LNd16:
4181 case ARM::VLD1LNd32:
4182 case ARM::VLD1LNd8_UPD:
4183 case ARM::VLD1LNd16_UPD:
4184 case ARM::VLD1LNd32_UPD:
4185 case ARM::VLD2LNd8:
4186 case ARM::VLD2LNd16:
4187 case ARM::VLD2LNd32:
4188 case ARM::VLD2LNq16:
4189 case ARM::VLD2LNq32:
4190 case ARM::VLD2LNd8_UPD:
4191 case ARM::VLD2LNd16_UPD:
4192 case ARM::VLD2LNd32_UPD:
4193 case ARM::VLD2LNq16_UPD:
4194 case ARM::VLD2LNq32_UPD:
4195 case ARM::VLD4LNd8:
4196 case ARM::VLD4LNd16:
4197 case ARM::VLD4LNd32:
4198 case ARM::VLD4LNq16:
4199 case ARM::VLD4LNq32:
4200 case ARM::VLD4LNd8_UPD:
4201 case ARM::VLD4LNd16_UPD:
4202 case ARM::VLD4LNd32_UPD:
4203 case ARM::VLD4LNq16_UPD:
4204 case ARM::VLD4LNq32_UPD:
4205 // If the address is not 64-bit aligned, the latencies of these
4206 // instructions increases by one.
4207 ++Adjust;
4208 break;
4209 }
4210 }
4211 return Adjust;
4212}
4213
4215 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4216 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4217 // No operand latency. The caller may fall back to getInstrLatency.
4218 if (!ItinData || ItinData->isEmpty())
4219 return std::nullopt;
4220
4221 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4222 Register Reg = DefMO.getReg();
4223
4224 const MachineInstr *ResolvedDefMI = &DefMI;
4225 unsigned DefAdj = 0;
4226 if (DefMI.isBundle())
4227 ResolvedDefMI =
4228 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4229 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4230 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4231 return 1;
4232 }
4233
4234 const MachineInstr *ResolvedUseMI = &UseMI;
4235 unsigned UseAdj = 0;
4236 if (UseMI.isBundle()) {
4237 ResolvedUseMI =
4238 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4239 if (!ResolvedUseMI)
4240 return std::nullopt;
4241 }
4242
4243 return getOperandLatencyImpl(
4244 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4245 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4246}
4247
4248std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4249 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4250 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4251 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4252 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4253 if (Reg == ARM::CPSR) {
4254 if (DefMI.getOpcode() == ARM::FMSTAT) {
4255 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4256 return Subtarget.isLikeA9() ? 1 : 20;
4257 }
4258
4259 // CPSR set and branch can be paired in the same cycle.
4260 if (UseMI.isBranch())
4261 return 0;
4262
4263 // Otherwise it takes the instruction latency (generally one).
4264 unsigned Latency = getInstrLatency(ItinData, DefMI);
4265
4266 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4267 // its uses. Instructions which are otherwise scheduled between them may
4268 // incur a code size penalty (not able to use the CPSR setting 16-bit
4269 // instructions).
4270 if (Latency > 0 && Subtarget.isThumb2()) {
4271 const MachineFunction *MF = DefMI.getParent()->getParent();
4272 if (MF->getFunction().hasOptSize())
4273 --Latency;
4274 }
4275 return Latency;
4276 }
4277
4278 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4279 return std::nullopt;
4280
4281 unsigned DefAlign = DefMI.hasOneMemOperand()
4282 ? (*DefMI.memoperands_begin())->getAlign().value()
4283 : 0;
4284 unsigned UseAlign = UseMI.hasOneMemOperand()
4285 ? (*UseMI.memoperands_begin())->getAlign().value()
4286 : 0;
4287
4288 // Get the itinerary's latency if possible, and handle variable_ops.
4289 std::optional<unsigned> Latency = getOperandLatency(
4290 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4291 // Unable to find operand latency. The caller may resort to getInstrLatency.
4292 if (!Latency)
4293 return std::nullopt;
4294
4295 // Adjust for IT block position.
4296 int Adj = DefAdj + UseAdj;
4297
4298 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4299 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4300 if (Adj >= 0 || (int)*Latency > -Adj) {
4301 return *Latency + Adj;
4302 }
4303 // Return the itinerary latency, which may be zero but not less than zero.
4304 return Latency;
4305}
4306
4307std::optional<unsigned>
4309 SDNode *DefNode, unsigned DefIdx,
4310 SDNode *UseNode, unsigned UseIdx) const {
4311 if (!DefNode->isMachineOpcode())
4312 return 1;
4313
4314 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4315
4316 if (isZeroCost(DefMCID.Opcode))
4317 return 0;
4318
4319 if (!ItinData || ItinData->isEmpty())
4320 return DefMCID.mayLoad() ? 3 : 1;
4321
4322 if (!UseNode->isMachineOpcode()) {
4323 std::optional<unsigned> Latency =
4324 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4325 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4326 int Threshold = 1 + Adj;
4327 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4328 }
4329
4330 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4331 auto *DefMN = cast<MachineSDNode>(DefNode);
4332 unsigned DefAlign = !DefMN->memoperands_empty()
4333 ? (*DefMN->memoperands_begin())->getAlign().value()
4334 : 0;
4335 auto *UseMN = cast<MachineSDNode>(UseNode);
4336 unsigned UseAlign = !UseMN->memoperands_empty()
4337 ? (*UseMN->memoperands_begin())->getAlign().value()
4338 : 0;
4339 std::optional<unsigned> Latency = getOperandLatency(
4340 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4341 if (!Latency)
4342 return std::nullopt;
4343
4344 if (Latency > 1U &&
4345 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4346 Subtarget.isCortexA7())) {
4347 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4348 // variants are one cycle cheaper.
4349 switch (DefMCID.getOpcode()) {
4350 default: break;
4351 case ARM::LDRrs:
4352 case ARM::LDRBrs: {
4353 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4354 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4355 if (ShImm == 0 ||
4356 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4357 Latency = *Latency - 1;
4358 break;
4359 }
4360 case ARM::t2LDRs:
4361 case ARM::t2LDRBs:
4362 case ARM::t2LDRHs:
4363 case ARM::t2LDRSHs: {
4364 // Thumb2 mode: lsl only.
4365 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4366 if (ShAmt == 0 || ShAmt == 2)
4367 Latency = *Latency - 1;
4368 break;
4369 }
4370 }
4371 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4372 // FIXME: Properly handle all of the latency adjustments for address
4373 // writeback.
4374 switch (DefMCID.getOpcode()) {
4375 default: break;
4376 case ARM::LDRrs:
4377 case ARM::LDRBrs: {
4378 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4379 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4380 if (ShImm == 0 ||
4381 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4383 Latency = *Latency - 2;
4384 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4385 Latency = *Latency - 1;
4386 break;
4387 }
4388 case ARM::t2LDRs:
4389 case ARM::t2LDRBs:
4390 case ARM::t2LDRHs:
4391 case ARM::t2LDRSHs:
4392 // Thumb2 mode: lsl 0-3 only.
4393 Latency = *Latency - 2;
4394 break;
4395 }
4396 }
4397
4398 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4399 switch (DefMCID.getOpcode()) {
4400 default: break;
4401 case ARM::VLD1q8:
4402 case ARM::VLD1q16:
4403 case ARM::VLD1q32:
4404 case ARM::VLD1q64:
4405 case ARM::VLD1q8wb_register:
4406 case ARM::VLD1q16wb_register:
4407 case ARM::VLD1q32wb_register:
4408 case ARM::VLD1q64wb_register:
4409 case ARM::VLD1q8wb_fixed:
4410 case ARM::VLD1q16wb_fixed:
4411 case ARM::VLD1q32wb_fixed:
4412 case ARM::VLD1q64wb_fixed:
4413 case ARM::VLD2d8:
4414 case ARM::VLD2d16:
4415 case ARM::VLD2d32:
4416 case ARM::VLD2q8Pseudo:
4417 case ARM::VLD2q16Pseudo:
4418 case ARM::VLD2q32Pseudo:
4419 case ARM::VLD2d8wb_fixed:
4420 case ARM::VLD2d16wb_fixed:
4421 case ARM::VLD2d32wb_fixed:
4422 case ARM::VLD2q8PseudoWB_fixed:
4423 case ARM::VLD2q16PseudoWB_fixed:
4424 case ARM::VLD2q32PseudoWB_fixed:
4425 case ARM::VLD2d8wb_register:
4426 case ARM::VLD2d16wb_register:
4427 case ARM::VLD2d32wb_register:
4428 case ARM::VLD2q8PseudoWB_register:
4429 case ARM::VLD2q16PseudoWB_register:
4430 case ARM::VLD2q32PseudoWB_register:
4431 case ARM::VLD3d8Pseudo:
4432 case ARM::VLD3d16Pseudo:
4433 case ARM::VLD3d32Pseudo:
4434 case ARM::VLD1d8TPseudo:
4435 case ARM::VLD1d16TPseudo:
4436 case ARM::VLD1d32TPseudo:
4437 case ARM::VLD1d64TPseudo:
4438 case ARM::VLD1d64TPseudoWB_fixed:
4439 case ARM::VLD1d64TPseudoWB_register:
4440 case ARM::VLD3d8Pseudo_UPD:
4441 case ARM::VLD3d16Pseudo_UPD:
4442 case ARM::VLD3d32Pseudo_UPD:
4443 case ARM::VLD3q8Pseudo_UPD:
4444 case ARM::VLD3q16Pseudo_UPD:
4445 case ARM::VLD3q32Pseudo_UPD:
4446 case ARM::VLD3q8oddPseudo:
4447 case ARM::VLD3q16oddPseudo:
4448 case ARM::VLD3q32oddPseudo:
4449 case ARM::VLD3q8oddPseudo_UPD:
4450 case ARM::VLD3q16oddPseudo_UPD:
4451 case ARM::VLD3q32oddPseudo_UPD:
4452 case ARM::VLD4d8Pseudo:
4453 case ARM::VLD4d16Pseudo:
4454 case ARM::VLD4d32Pseudo:
4455 case ARM::VLD1d8QPseudo:
4456 case ARM::VLD1d16QPseudo:
4457 case ARM::VLD1d32QPseudo:
4458 case ARM::VLD1d64QPseudo:
4459 case ARM::VLD1d64QPseudoWB_fixed:
4460 case ARM::VLD1d64QPseudoWB_register:
4461 case ARM::VLD1q8HighQPseudo:
4462 case ARM::VLD1q8LowQPseudo_UPD:
4463 case ARM::VLD1q8HighTPseudo:
4464 case ARM::VLD1q8LowTPseudo_UPD:
4465 case ARM::VLD1q16HighQPseudo:
4466 case ARM::VLD1q16LowQPseudo_UPD:
4467 case ARM::VLD1q16HighTPseudo:
4468 case ARM::VLD1q16LowTPseudo_UPD:
4469 case ARM::VLD1q32HighQPseudo:
4470 case ARM::VLD1q32LowQPseudo_UPD:
4471 case ARM::VLD1q32HighTPseudo:
4472 case ARM::VLD1q32LowTPseudo_UPD:
4473 case ARM::VLD1q64HighQPseudo:
4474 case ARM::VLD1q64LowQPseudo_UPD:
4475 case ARM::VLD1q64HighTPseudo:
4476 case ARM::VLD1q64LowTPseudo_UPD:
4477 case ARM::VLD4d8Pseudo_UPD:
4478 case ARM::VLD4d16Pseudo_UPD:
4479 case ARM::VLD4d32Pseudo_UPD:
4480 case ARM::VLD4q8Pseudo_UPD:
4481 case ARM::VLD4q16Pseudo_UPD:
4482 case ARM::VLD4q32Pseudo_UPD:
4483 case ARM::VLD4q8oddPseudo:
4484 case ARM::VLD4q16oddPseudo:
4485 case ARM::VLD4q32oddPseudo:
4486 case ARM::VLD4q8oddPseudo_UPD:
4487 case ARM::VLD4q16oddPseudo_UPD:
4488 case ARM::VLD4q32oddPseudo_UPD:
4489 case ARM::VLD1DUPq8:
4490 case ARM::VLD1DUPq16:
4491 case ARM::VLD1DUPq32:
4492 case ARM::VLD1DUPq8wb_fixed:
4493 case ARM::VLD1DUPq16wb_fixed:
4494 case ARM::VLD1DUPq32wb_fixed:
4495 case ARM::VLD1DUPq8wb_register:
4496 case ARM::VLD1DUPq16wb_register:
4497 case ARM::VLD1DUPq32wb_register:
4498 case ARM::VLD2DUPd8:
4499 case ARM::VLD2DUPd16:
4500 case ARM::VLD2DUPd32:
4501 case ARM::VLD2DUPd8wb_fixed:
4502 case ARM::VLD2DUPd16wb_fixed:
4503 case ARM::VLD2DUPd32wb_fixed:
4504 case ARM::VLD2DUPd8wb_register:
4505 case ARM::VLD2DUPd16wb_register:
4506 case ARM::VLD2DUPd32wb_register:
4507 case ARM::VLD2DUPq8EvenPseudo:
4508 case ARM::VLD2DUPq8OddPseudo:
4509 case ARM::VLD2DUPq16EvenPseudo:
4510 case ARM::VLD2DUPq16OddPseudo:
4511 case ARM::VLD2DUPq32EvenPseudo:
4512 case ARM::VLD2DUPq32OddPseudo:
4513 case ARM::VLD3DUPq8EvenPseudo:
4514 case ARM::VLD3DUPq8OddPseudo:
4515 case ARM::VLD3DUPq16EvenPseudo:
4516 case ARM::VLD3DUPq16OddPseudo:
4517 case ARM::VLD3DUPq32EvenPseudo:
4518 case ARM::VLD3DUPq32OddPseudo:
4519 case ARM::VLD4DUPd8Pseudo:
4520 case ARM::VLD4DUPd16Pseudo:
4521 case ARM::VLD4DUPd32Pseudo:
4522 case ARM::VLD4DUPd8Pseudo_UPD:
4523 case ARM::VLD4DUPd16Pseudo_UPD:
4524 case ARM::VLD4DUPd32Pseudo_UPD:
4525 case ARM::VLD4DUPq8EvenPseudo:
4526 case ARM::VLD4DUPq8OddPseudo:
4527 case ARM::VLD4DUPq16EvenPseudo:
4528 case ARM::VLD4DUPq16OddPseudo:
4529 case ARM::VLD4DUPq32EvenPseudo:
4530 case ARM::VLD4DUPq32OddPseudo:
4531 case ARM::VLD1LNq8Pseudo:
4532 case ARM::VLD1LNq16Pseudo:
4533 case ARM::VLD1LNq32Pseudo:
4534 case ARM::VLD1LNq8Pseudo_UPD:
4535 case ARM::VLD1LNq16Pseudo_UPD:
4536 case ARM::VLD1LNq32Pseudo_UPD:
4537 case ARM::VLD2LNd8Pseudo:
4538 case ARM::VLD2LNd16Pseudo:
4539 case ARM::VLD2LNd32Pseudo:
4540 case ARM::VLD2LNq16Pseudo:
4541 case ARM::VLD2LNq32Pseudo:
4542 case ARM::VLD2LNd8Pseudo_UPD:
4543 case ARM::VLD2LNd16Pseudo_UPD:
4544 case ARM::VLD2LNd32Pseudo_UPD:
4545 case ARM::VLD2LNq16Pseudo_UPD:
4546 case ARM::VLD2LNq32Pseudo_UPD:
4547 case ARM::VLD4LNd8Pseudo:
4548 case ARM::VLD4LNd16Pseudo:
4549 case ARM::VLD4LNd32Pseudo:
4550 case ARM::VLD4LNq16Pseudo:
4551 case ARM::VLD4LNq32Pseudo:
4552 case ARM::VLD4LNd8Pseudo_UPD:
4553 case ARM::VLD4LNd16Pseudo_UPD:
4554 case ARM::VLD4LNd32Pseudo_UPD:
4555 case ARM::VLD4LNq16Pseudo_UPD:
4556 case ARM::VLD4LNq32Pseudo_UPD:
4557 // If the address is not 64-bit aligned, the latencies of these
4558 // instructions increases by one.
4559 Latency = *Latency + 1;
4560 break;
4561 }
4562
4563 return Latency;
4564}
4565
4566unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4567 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4568 MI.isImplicitDef())
4569 return 0;
4570
4571 if (MI.isBundle())
4572 return 0;
4573
4574 const MCInstrDesc &MCID = MI.getDesc();
4575
4576 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4577 !Subtarget.cheapPredicableCPSRDef())) {
4578 // When predicated, CPSR is an additional source operand for CPSR updating
4579 // instructions, this apparently increases their latencies.
4580 return 1;
4581 }
4582 return 0;
4583}
4584
4585unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4586 const MachineInstr &MI,
4587 unsigned *PredCost) const {
4588 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4589 MI.isImplicitDef())
4590 return 1;
4591
4592 // An instruction scheduler typically runs on unbundled instructions, however
4593 // other passes may query the latency of a bundled instruction.
4594 if (MI.isBundle()) {
4595 unsigned Latency = 0;
4597 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4598 while (++I != E && I->isInsideBundle()) {
4599 if (I->getOpcode() != ARM::t2IT)
4600 Latency += getInstrLatency(ItinData, *I, PredCost);
4601 }
4602 return Latency;
4603 }
4604
4605 const MCInstrDesc &MCID = MI.getDesc();
4606 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4607 !Subtarget.cheapPredicableCPSRDef()))) {
4608 // When predicated, CPSR is an additional source operand for CPSR updating
4609 // instructions, this apparently increases their latencies.
4610 *PredCost = 1;
4611 }
4612 // Be sure to call getStageLatency for an empty itinerary in case it has a
4613 // valid MinLatency property.
4614 if (!ItinData)
4615 return MI.mayLoad() ? 3 : 1;
4616
4617 unsigned Class = MCID.getSchedClass();
4618
4619 // For instructions with variable uops, use uops as latency.
4620 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4621 return getNumMicroOps(ItinData, MI);
4622
4623 // For the common case, fall back on the itinerary's latency.
4624 unsigned Latency = ItinData->getStageLatency(Class);
4625
4626 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4627 unsigned DefAlign =
4628 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4629 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4630 if (Adj >= 0 || (int)Latency > -Adj) {
4631 return Latency + Adj;
4632 }
4633 return Latency;
4634}
4635
4636unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4637 SDNode *Node) const {
4638 if (!Node->isMachineOpcode())
4639 return 1;
4640
4641 if (!ItinData || ItinData->isEmpty())
4642 return 1;
4643
4644 unsigned Opcode = Node->getMachineOpcode();
4645 switch (Opcode) {
4646 default:
4647 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4648 case ARM::VLDMQIA:
4649 case ARM::VSTMQIA:
4650 return 2;
4651 }
4652}
4653
4654bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4655 const MachineRegisterInfo *MRI,
4656 const MachineInstr &DefMI,
4657 unsigned DefIdx,
4658 const MachineInstr &UseMI,
4659 unsigned UseIdx) const {
4660 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4661 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4662 if (Subtarget.nonpipelinedVFP() &&
4663 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4664 return true;
4665
4666 // Hoist VFP / NEON instructions with 4 or higher latency.
4667 unsigned Latency =
4668 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4669 if (Latency <= 3)
4670 return false;
4671 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4672 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4673}
4674
4675bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4676 const MachineInstr &DefMI,
4677 unsigned DefIdx) const {
4678 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4679 if (!ItinData || ItinData->isEmpty())
4680 return false;
4681
4682 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4683 if (DDomain == ARMII::DomainGeneral) {
4684 unsigned DefClass = DefMI.getDesc().getSchedClass();
4685 std::optional<unsigned> DefCycle =
4686 ItinData->getOperandCycle(DefClass, DefIdx);
4687 return DefCycle && DefCycle <= 2U;
4688 }
4689 return false;
4690}
4691
4692bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4693 StringRef &ErrInfo) const {
4694 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4695 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4696 return false;
4697 }
4698 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4699 // Make sure we don't generate a lo-lo mov that isn't supported.
4700 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4701 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4702 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4703 return false;
4704 }
4705 }
4706 if (MI.getOpcode() == ARM::tPUSH ||
4707 MI.getOpcode() == ARM::tPOP ||
4708 MI.getOpcode() == ARM::tPOP_RET) {
4709 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4710 if (MO.isImplicit() || !MO.isReg())
4711 continue;
4712 Register Reg = MO.getReg();
4713 if (Reg < ARM::R0 || Reg > ARM::R7) {
4714 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4715 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4716 ErrInfo = "Unsupported register in Thumb1 push/pop";
4717 return false;
4718 }
4719 }
4720 }
4721 }
4722 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4723 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4724 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4725 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4726 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4727 return false;
4728 }
4729 }
4730
4731 // Check the address model by taking the first Imm operand and checking it is
4732 // legal for that addressing mode.
4734 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4735 switch (AddrMode) {
4736 default:
4737 break;
4745 case ARMII::AddrModeT2_i12: {
4746 uint32_t Imm = 0;
4747 for (auto Op : MI.operands()) {
4748 if (Op.isImm()) {
4749 Imm = Op.getImm();
4750 break;
4751 }
4752 }
4753 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4754 ErrInfo = "Incorrect AddrMode Imm for instruction";
4755 return false;
4756 }
4757 break;
4758 }
4759 }
4760 return true;
4761}
4762
4764 unsigned LoadImmOpc,
4765 unsigned LoadOpc) const {
4766 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4767 "ROPI/RWPI not currently supported with stack guard");
4768
4769 MachineBasicBlock &MBB = *MI->getParent();
4770 DebugLoc DL = MI->getDebugLoc();
4771 Register Reg = MI->getOperand(0).getReg();
4773 unsigned int Offset = 0;
4774
4775 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4776 assert(!Subtarget.isReadTPSoft() &&
4777 "TLS stack protector requires hardware TLS register");
4778
4779 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4780 .addImm(15)
4781 .addImm(0)
4782 .addImm(13)
4783 .addImm(0)
4784 .addImm(3)
4786
4787 Module &M = *MBB.getParent()->getFunction().getParent();
4788 Offset = M.getStackProtectorGuardOffset();
4789 if (Offset & ~0xfffU) {
4790 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4791 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4792 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4793 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4794 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4795 .addReg(Reg, RegState::Kill)
4796 .addImm(Offset & ~0xfffU)
4798 .addReg(0);
4799 Offset &= 0xfffU;
4800 }
4801 } else {
4802 const GlobalValue *GV =
4803 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4804 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4805
4806 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4807 if (Subtarget.isTargetMachO()) {
4808 TargetFlags |= ARMII::MO_NONLAZY;
4809 } else if (Subtarget.isTargetCOFF()) {
4810 if (GV->hasDLLImportStorageClass())
4811 TargetFlags |= ARMII::MO_DLLIMPORT;
4812 else if (IsIndirect)
4813 TargetFlags |= ARMII::MO_COFFSTUB;
4814 } else if (IsIndirect) {
4815 TargetFlags |= ARMII::MO_GOT;
4816 }
4817
4818 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4819 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4820 auto APSREncoding =
4821 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4822 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4823 .addImm(APSREncoding)
4825 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4826 .addGlobalAddress(GV, 0, TargetFlags);
4827 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4828 .addImm(APSREncoding)
4829 .addReg(CPSRSaveReg, RegState::Kill)
4831 } else {
4832 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4833 .addGlobalAddress(GV, 0, TargetFlags);
4834 }
4835
4836 if (IsIndirect) {
4837 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4838 MIB.addReg(Reg, RegState::Kill).addImm(0);
4839 auto Flags = MachineMemOperand::MOLoad |
4842 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4843 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4845 }
4846 }
4847
4848 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4849 MIB.addReg(Reg, RegState::Kill)
4850 .addImm(Offset)
4851 .cloneMemRefs(*MI)
4853}
4854
4855bool
4856ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4857 unsigned &AddSubOpc,
4858 bool &NegAcc, bool &HasLane) const {
4859 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4860 if (I == MLxEntryMap.end())
4861 return false;
4862
4863 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4864 MulOpc = Entry.MulOpc;
4865 AddSubOpc = Entry.AddSubOpc;
4866 NegAcc = Entry.NegAcc;
4867 HasLane = Entry.HasLane;
4868 return true;
4869}
4870
4871//===----------------------------------------------------------------------===//
4872// Execution domains.
4873//===----------------------------------------------------------------------===//
4874//
4875// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4876// and some can go down both. The vmov instructions go down the VFP pipeline,
4877// but they can be changed to vorr equivalents that are executed by the NEON
4878// pipeline.
4879//
4880// We use the following execution domain numbering:
4881//
4887
4888//
4889// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4890//
4891std::pair<uint16_t, uint16_t>
4893 // If we don't have access to NEON instructions then we won't be able
4894 // to swizzle anything to the NEON domain. Check to make sure.
4895 if (Subtarget.hasNEON()) {
4896 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4897 // if they are not predicated.
4898 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4899 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4900
4901 // CortexA9 is particularly picky about mixing the two and wants these
4902 // converted.
4903 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4904 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4905 MI.getOpcode() == ARM::VMOVS))
4906 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4907 }
4908 // No other instructions can be swizzled, so just determine their domain.
4909 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4910
4912 return std::make_pair(ExeNEON, 0);
4913
4914 // Certain instructions can go either way on Cortex-A8.
4915 // Treat them as NEON instructions.
4916 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4917 return std::make_pair(ExeNEON, 0);
4918
4920 return std::make_pair(ExeVFP, 0);
4921
4922 return std::make_pair(ExeGeneric, 0);
4923}
4924
4926 unsigned SReg, unsigned &Lane) {
4927 MCRegister DReg =
4928 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4929 Lane = 0;
4930
4931 if (DReg)
4932 return DReg;
4933
4934 Lane = 1;
4935 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4936
4937 assert(DReg && "S-register with no D super-register?");
4938 return DReg;
4939}
4940
4941/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4942/// set ImplicitSReg to a register number that must be marked as implicit-use or
4943/// zero if no register needs to be defined as implicit-use.
4944///
4945/// If the function cannot determine if an SPR should be marked implicit use or
4946/// not, it returns false.
4947///
4948/// This function handles cases where an instruction is being modified from taking
4949/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4950/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4951/// lane of the DPR).
4952///
4953/// If the other SPR is defined, an implicit-use of it should be added. Else,
4954/// (including the case where the DPR itself is defined), it should not.
4955///
4957 MachineInstr &MI, MCRegister DReg,
4958 unsigned Lane,
4959 MCRegister &ImplicitSReg) {
4960 // If the DPR is defined or used already, the other SPR lane will be chained
4961 // correctly, so there is nothing to be done.
4962 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4963 ImplicitSReg = MCRegister();
4964 return true;
4965 }
4966
4967 // Otherwise we need to go searching to see if the SPR is set explicitly.
4968 ImplicitSReg = TRI->getSubReg(DReg,
4969 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4971 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4972
4973 if (LQR == MachineBasicBlock::LQR_Live)
4974 return true;
4975 else if (LQR == MachineBasicBlock::LQR_Unknown)
4976 return false;
4977
4978 // If the register is known not to be live, there is no need to add an
4979 // implicit-use.
4980 ImplicitSReg = MCRegister();
4981 return true;
4982}
4983
4985 unsigned Domain) const {
4986 unsigned DstReg, SrcReg;
4987 MCRegister DReg;
4988 unsigned Lane;
4989 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4991 switch (MI.getOpcode()) {
4992 default:
4993 llvm_unreachable("cannot handle opcode!");
4994 break;
4995 case ARM::VMOVD:
4996 if (Domain != ExeNEON)
4997 break;
4998
4999 // Zap the predicate operands.
5000 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5001
5002 // Make sure we've got NEON instructions.
5003 assert(Subtarget.hasNEON() && "VORRd requires NEON");
5004
5005 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5006 DstReg = MI.getOperand(0).getReg();
5007 SrcReg = MI.getOperand(1).getReg();
5008
5009 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5010 MI.removeOperand(i - 1);
5011
5012 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5013 MI.setDesc(get(ARM::VORRd));
5014 MIB.addReg(DstReg, RegState::Define)
5015 .addReg(SrcReg)
5016 .addReg(SrcReg)
5018 break;
5019 case ARM::VMOVRS:
5020 if (Domain != ExeNEON)
5021 break;
5022 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5023
5024 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5025 DstReg = MI.getOperand(0).getReg();
5026 SrcReg = MI.getOperand(1).getReg();
5027
5028 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5029 MI.removeOperand(i - 1);
5030
5031 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5032
5033 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5034 // Note that DSrc has been widened and the other lane may be undef, which
5035 // contaminates the entire register.
5036 MI.setDesc(get(ARM::VGETLNi32));
5037 MIB.addReg(DstReg, RegState::Define)
5038 .addReg(DReg, RegState::Undef)
5039 .addImm(Lane)
5041
5042 // The old source should be an implicit use, otherwise we might think it
5043 // was dead before here.
5044 MIB.addReg(SrcReg, RegState::Implicit);
5045 break;
5046 case ARM::VMOVSR: {
5047 if (Domain != ExeNEON)
5048 break;
5049 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5050
5051 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5052 DstReg = MI.getOperand(0).getReg();
5053 SrcReg = MI.getOperand(1).getReg();
5054
5055 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5056
5057 MCRegister ImplicitSReg;
5058 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5059 break;
5060
5061 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5062 MI.removeOperand(i - 1);
5063
5064 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5065 // Again DDst may be undefined at the beginning of this instruction.
5066 MI.setDesc(get(ARM::VSETLNi32));
5067 MIB.addReg(DReg, RegState::Define)
5068 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5069 .addReg(SrcReg)
5070 .addImm(Lane)
5072
5073 // The narrower destination must be marked as set to keep previous chains
5074 // in place.
5076 if (ImplicitSReg)
5077 MIB.addReg(ImplicitSReg, RegState::Implicit);
5078 break;
5079 }
5080 case ARM::VMOVS: {
5081 if (Domain != ExeNEON)
5082 break;
5083
5084 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5085 DstReg = MI.getOperand(0).getReg();
5086 SrcReg = MI.getOperand(1).getReg();
5087
5088 unsigned DstLane = 0, SrcLane = 0;
5089 MCRegister DDst, DSrc;
5090 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5091 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5092
5093 MCRegister ImplicitSReg;
5094 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5095 break;
5096
5097 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5098 MI.removeOperand(i - 1);
5099
5100 if (DSrc == DDst) {
5101 // Destination can be:
5102 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5103 MI.setDesc(get(ARM::VDUPLN32d));
5104 MIB.addReg(DDst, RegState::Define)
5105 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5106 .addImm(SrcLane)
5108
5109 // Neither the source or the destination are naturally represented any
5110 // more, so add them in manually.
5112 MIB.addReg(SrcReg, RegState::Implicit);
5113 if (ImplicitSReg)
5114 MIB.addReg(ImplicitSReg, RegState::Implicit);
5115 break;
5116 }
5117
5118 // In general there's no single instruction that can perform an S <-> S
5119 // move in NEON space, but a pair of VEXT instructions *can* do the
5120 // job. It turns out that the VEXTs needed will only use DSrc once, with
5121 // the position based purely on the combination of lane-0 and lane-1
5122 // involved. For example
5123 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5124 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5125 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5126 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5127 //
5128 // Pattern of the MachineInstrs is:
5129 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5130 MachineInstrBuilder NewMIB;
5131 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5132 DDst);
5133
5134 // On the first instruction, both DSrc and DDst may be undef if present.
5135 // Specifically when the original instruction didn't have them as an
5136 // <imp-use>.
5137 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5138 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5139 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5140
5141 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5142 CurUndef = !MI.readsRegister(CurReg, TRI);
5143 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5144 .addImm(1)
5146
5147 if (SrcLane == DstLane)
5148 NewMIB.addReg(SrcReg, RegState::Implicit);
5149
5150 MI.setDesc(get(ARM::VEXTd32));
5151 MIB.addReg(DDst, RegState::Define);
5152
5153 // On the second instruction, DDst has definitely been defined above, so
5154 // it is not undef. DSrc, if present, can be undef as above.
5155 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5156 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5157 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5158
5159 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5160 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5161 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5162 .addImm(1)
5164
5165 if (SrcLane != DstLane)
5166 MIB.addReg(SrcReg, RegState::Implicit);
5167
5168 // As before, the original destination is no longer represented, add it
5169 // implicitly.
5171 if (ImplicitSReg != 0)
5172 MIB.addReg(ImplicitSReg, RegState::Implicit);
5173 break;
5174 }
5175 }
5176}
5177
5178//===----------------------------------------------------------------------===//
5179// Partial register updates
5180//===----------------------------------------------------------------------===//
5181//
5182// Swift renames NEON registers with 64-bit granularity. That means any
5183// instruction writing an S-reg implicitly reads the containing D-reg. The
5184// problem is mostly avoided by translating f32 operations to v2f32 operations
5185// on D-registers, but f32 loads are still a problem.
5186//
5187// These instructions can load an f32 into a NEON register:
5188//
5189// VLDRS - Only writes S, partial D update.
5190// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5191// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5192//
5193// FCONSTD can be used as a dependency-breaking instruction.
5195 const MachineInstr &MI, unsigned OpNum,
5196 const TargetRegisterInfo *TRI) const {
5197 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5198 if (!PartialUpdateClearance)
5199 return 0;
5200
5201 assert(TRI && "Need TRI instance");
5202
5203 const MachineOperand &MO = MI.getOperand(OpNum);
5204 if (MO.readsReg())
5205 return 0;
5206 Register Reg = MO.getReg();
5207 int UseOp = -1;
5208
5209 switch (MI.getOpcode()) {
5210 // Normal instructions writing only an S-register.
5211 case ARM::VLDRS:
5212 case ARM::FCONSTS:
5213 case ARM::VMOVSR:
5214 case ARM::VMOVv8i8:
5215 case ARM::VMOVv4i16:
5216 case ARM::VMOVv2i32:
5217 case ARM::VMOVv2f32:
5218 case ARM::VMOVv1i64:
5219 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5220 break;
5221
5222 // Explicitly reads the dependency.
5223 case ARM::VLD1LNd32:
5224 UseOp = 3;
5225 break;
5226 default:
5227 return 0;
5228 }
5229
5230 // If this instruction actually reads a value from Reg, there is no unwanted
5231 // dependency.
5232 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5233 return 0;
5234
5235 // We must be able to clobber the whole D-reg.
5236 if (Reg.isVirtual()) {
5237 // Virtual register must be a def undef foo:ssub_0 operand.
5238 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5239 return 0;
5240 } else if (ARM::SPRRegClass.contains(Reg)) {
5241 // Physical register: MI must define the full D-reg.
5242 MCRegister DReg =
5243 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5244 if (!DReg || !MI.definesRegister(DReg, TRI))
5245 return 0;
5246 }
5247
5248 // MI has an unwanted D-register dependency.
5249 // Avoid defs in the previous N instructrions.
5250 return PartialUpdateClearance;
5251}
5252
5253// Break a partial register dependency after getPartialRegUpdateClearance
5254// returned non-zero.
5256 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5257 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5258 assert(TRI && "Need TRI instance");
5259
5260 const MachineOperand &MO = MI.getOperand(OpNum);
5261 Register Reg = MO.getReg();
5262 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5263 unsigned DReg = Reg;
5264
5265 // If MI defines an S-reg, find the corresponding D super-register.
5266 if (ARM::SPRRegClass.contains(Reg)) {
5267 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5268 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5269 }
5270
5271 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5272 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5273
5274 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5275 // the full D-register by loading the same value to both lanes. The
5276 // instruction is micro-coded with 2 uops, so don't do this until we can
5277 // properly schedule micro-coded instructions. The dispatcher stalls cause
5278 // too big regressions.
5279
5280 // Insert the dependency-breaking FCONSTD before MI.
5281 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5282 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5283 .addImm(96)
5285 MI.addRegisterKilled(DReg, TRI, true);
5286}
5287
5289 return Subtarget.hasFeature(ARM::HasV6KOps);
5290}
5291
5293 if (MI->getNumOperands() < 4)
5294 return true;
5295 unsigned ShOpVal = MI->getOperand(3).getImm();
5296 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5297 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5298 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5299 ((ShImm == 1 || ShImm == 2) &&
5300 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5301 return true;
5302
5303 return false;
5304}
5305
5307 const MachineInstr &MI, unsigned DefIdx,
5308 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5309 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5310 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5311
5312 switch (MI.getOpcode()) {
5313 case ARM::VMOVDRR:
5314 // dX = VMOVDRR rY, rZ
5315 // is the same as:
5316 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5317 // Populate the InputRegs accordingly.
5318 // rY
5319 const MachineOperand *MOReg = &MI.getOperand(1);
5320 if (!MOReg->isUndef())
5321 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5322 MOReg->getSubReg(), ARM::ssub_0));
5323 // rZ
5324 MOReg = &MI.getOperand(2);
5325 if (!MOReg->isUndef())
5326 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5327 MOReg->getSubReg(), ARM::ssub_1));
5328 return true;
5329 }
5330 llvm_unreachable("Target dependent opcode missing");
5331}
5332
5334 const MachineInstr &MI, unsigned DefIdx,
5335 RegSubRegPairAndIdx &InputReg) const {
5336 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5337 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5338
5339 switch (MI.getOpcode()) {
5340 case ARM::VMOVRRD:
5341 // rX, rY = VMOVRRD dZ
5342 // is the same as:
5343 // rX = EXTRACT_SUBREG dZ, ssub_0
5344 // rY = EXTRACT_SUBREG dZ, ssub_1
5345 const MachineOperand &MOReg = MI.getOperand(2);
5346 if (MOReg.isUndef())
5347 return false;
5348 InputReg.Reg = MOReg.getReg();
5349 InputReg.SubReg = MOReg.getSubReg();
5350 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5351 return true;
5352 }
5353 llvm_unreachable("Target dependent opcode missing");
5354}
5355
5357 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5358 RegSubRegPairAndIdx &InsertedReg) const {
5359 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5360 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5361
5362 switch (MI.getOpcode()) {
5363 case ARM::VSETLNi32:
5364 case ARM::MVE_VMOV_to_lane_32:
5365 // dX = VSETLNi32 dY, rZ, imm
5366 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5367 const MachineOperand &MOBaseReg = MI.getOperand(1);
5368 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5369 if (MOInsertedReg.isUndef())
5370 return false;
5371 const MachineOperand &MOIndex = MI.getOperand(3);
5372 BaseReg.Reg = MOBaseReg.getReg();
5373 BaseReg.SubReg = MOBaseReg.getSubReg();
5374
5375 InsertedReg.Reg = MOInsertedReg.getReg();
5376 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5377 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5378 return true;
5379 }
5380 llvm_unreachable("Target dependent opcode missing");
5381}
5382
5383std::pair<unsigned, unsigned>
5385 const unsigned Mask = ARMII::MO_OPTION_MASK;
5386 return std::make_pair(TF & Mask, TF & ~Mask);
5387}
5388
5391 using namespace ARMII;
5392
5393 static const std::pair<unsigned, const char *> TargetFlags[] = {
5394 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5395 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5396 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5397 };
5398 return ArrayRef(TargetFlags);
5399}
5400
5403 using namespace ARMII;
5404
5405 static const std::pair<unsigned, const char *> TargetFlags[] = {
5406 {MO_COFFSTUB, "arm-coffstub"},
5407 {MO_GOT, "arm-got"},
5408 {MO_SBREL, "arm-sbrel"},
5409 {MO_DLLIMPORT, "arm-dllimport"},
5410 {MO_SECREL, "arm-secrel"},
5411 {MO_NONLAZY, "arm-nonlazy"}};
5412 return ArrayRef(TargetFlags);
5413}
5414
5415std::optional<RegImmPair>
5417 int Sign = 1;
5418 unsigned Opcode = MI.getOpcode();
5419 int64_t Offset = 0;
5420
5421 // TODO: Handle cases where Reg is a super- or sub-register of the
5422 // destination register.
5423 const MachineOperand &Op0 = MI.getOperand(0);
5424 if (!Op0.isReg() || Reg != Op0.getReg())
5425 return std::nullopt;
5426
5427 // We describe SUBri or ADDri instructions.
5428 if (Opcode == ARM::SUBri)
5429 Sign = -1;
5430 else if (Opcode != ARM::ADDri)
5431 return std::nullopt;
5432
5433 // TODO: Third operand can be global address (usually some string). Since
5434 // strings can be relocated we cannot calculate their offsets for
5435 // now.
5436 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5437 return std::nullopt;
5438
5439 Offset = MI.getOperand(2).getImm() * Sign;
5440 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5441}
5442
5446 const TargetRegisterInfo *TRI) {
5447 for (auto I = From; I != To; ++I)
5448 if (I->modifiesRegister(Reg, TRI))
5449 return true;
5450 return false;
5451}
5452
5454 const TargetRegisterInfo *TRI) {
5455 // Search backwards to the instruction that defines CSPR. This may or not
5456 // be a CMP, we check that after this loop. If we find another instruction
5457 // that reads cpsr, we return nullptr.
5458 MachineBasicBlock::iterator CmpMI = Br;
5459 while (CmpMI != Br->getParent()->begin()) {
5460 --CmpMI;
5461 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5462 break;
5463 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5464 break;
5465 }
5466
5467 // Check that this inst is a CMP r[0-7], #0 and that the register
5468 // is not redefined between the cmp and the br.
5469 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5470 return nullptr;
5471 Register Reg = CmpMI->getOperand(0).getReg();
5472 Register PredReg;
5473 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5474 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5475 return nullptr;
5476 if (!isARMLowRegister(Reg))
5477 return nullptr;
5478 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5479 return nullptr;
5480
5481 return &*CmpMI;
5482}
5483
5485 const ARMSubtarget *Subtarget,
5486 bool ForCodesize) {
5487 if (Subtarget->isThumb()) {
5488 if (Val <= 255) // MOV
5489 return ForCodesize ? 2 : 1;
5490 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5491 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5492 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5493 return ForCodesize ? 4 : 1;
5494 if (Val <= 510) // MOV + ADDi8
5495 return ForCodesize ? 4 : 2;
5496 if (~Val <= 255) // MOV + MVN
5497 return ForCodesize ? 4 : 2;
5498 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5499 return ForCodesize ? 4 : 2;
5500 } else {
5501 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5502 return ForCodesize ? 4 : 1;
5503 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5504 return ForCodesize ? 4 : 1;
5505 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5506 return ForCodesize ? 4 : 1;
5507 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5508 return ForCodesize ? 8 : 2;
5509 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5510 return ForCodesize ? 8 : 2;
5511 }
5512 if (Subtarget->useMovt()) // MOVW + MOVT
5513 return ForCodesize ? 8 : 2;
5514 return ForCodesize ? 8 : 3; // Literal pool load
5515}
5516
5517bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5518 const ARMSubtarget *Subtarget,
5519 bool ForCodesize) {
5520 // Check with ForCodesize
5521 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5522 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5523 if (Cost1 < Cost2)
5524 return true;
5525 if (Cost1 > Cost2)
5526 return false;
5527
5528 // If they are equal, try with !ForCodesize
5529 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5530 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5531}
5532
5533/// Constants defining how certain sequences should be outlined.
5534/// This encompasses how an outlined function should be called, and what kind of
5535/// frame should be emitted for that outlined function.
5536///
5537/// \p MachineOutlinerTailCall implies that the function is being created from
5538/// a sequence of instructions ending in a return.
5539///
5540/// That is,
5541///
5542/// I1 OUTLINED_FUNCTION:
5543/// I2 --> B OUTLINED_FUNCTION I1
5544/// BX LR I2
5545/// BX LR
5546///
5547/// +-------------------------+--------+-----+
5548/// | | Thumb2 | ARM |
5549/// +-------------------------+--------+-----+
5550/// | Call overhead in Bytes | 4 | 4 |
5551/// | Frame overhead in Bytes | 0 | 0 |
5552/// | Stack fixup required | No | No |
5553/// +-------------------------+--------+-----+
5554///
5555/// \p MachineOutlinerThunk implies that the function is being created from
5556/// a sequence of instructions ending in a call. The outlined function is
5557/// called with a BL instruction, and the outlined function tail-calls the
5558/// original call destination.
5559///
5560/// That is,
5561///
5562/// I1 OUTLINED_FUNCTION:
5563/// I2 --> BL OUTLINED_FUNCTION I1
5564/// BL f I2
5565/// B f
5566///
5567/// +-------------------------+--------+-----+
5568/// | | Thumb2 | ARM |
5569/// +-------------------------+--------+-----+
5570/// | Call overhead in Bytes | 4 | 4 |
5571/// | Frame overhead in Bytes | 0 | 0 |
5572/// | Stack fixup required | No | No |
5573/// +-------------------------+--------+-----+
5574///
5575/// \p MachineOutlinerNoLRSave implies that the function should be called using
5576/// a BL instruction, but doesn't require LR to be saved and restored. This
5577/// happens when LR is known to be dead.
5578///
5579/// That is,
5580///
5581/// I1 OUTLINED_FUNCTION:
5582/// I2 --> BL OUTLINED_FUNCTION I1
5583/// I3 I2
5584/// I3
5585/// BX LR
5586///
5587/// +-------------------------+--------+-----+
5588/// | | Thumb2 | ARM |
5589/// +-------------------------+--------+-----+
5590/// | Call overhead in Bytes | 4 | 4 |
5591/// | Frame overhead in Bytes | 2 | 4 |
5592/// | Stack fixup required | No | No |
5593/// +-------------------------+--------+-----+
5594///
5595/// \p MachineOutlinerRegSave implies that the function should be called with a
5596/// save and restore of LR to an available register. This allows us to avoid
5597/// stack fixups. Note that this outlining variant is compatible with the
5598/// NoLRSave case.
5599///
5600/// That is,
5601///
5602/// I1 Save LR OUTLINED_FUNCTION:
5603/// I2 --> BL OUTLINED_FUNCTION I1
5604/// I3 Restore LR I2
5605/// I3
5606/// BX LR
5607///
5608/// +-------------------------+--------+-----+
5609/// | | Thumb2 | ARM |
5610/// +-------------------------+--------+-----+
5611/// | Call overhead in Bytes | 8 | 12 |
5612/// | Frame overhead in Bytes | 2 | 4 |
5613/// | Stack fixup required | No | No |
5614/// +-------------------------+--------+-----+
5615///
5616/// \p MachineOutlinerDefault implies that the function should be called with
5617/// a save and restore of LR to the stack.
5618///
5619/// That is,
5620///
5621/// I1 Save LR OUTLINED_FUNCTION:
5622/// I2 --> BL OUTLINED_FUNCTION I1
5623/// I3 Restore LR I2
5624/// I3
5625/// BX LR
5626///
5627/// +-------------------------+--------+-----+
5628/// | | Thumb2 | ARM |
5629/// +-------------------------+--------+-----+
5630/// | Call overhead in Bytes | 8 | 12 |
5631/// | Frame overhead in Bytes | 2 | 4 |
5632/// | Stack fixup required | Yes | Yes |
5633/// +-------------------------+--------+-----+
5634
5642
5648
5661
5663 : CallTailCall(target.isThumb() ? 4 : 4),
5664 FrameTailCall(target.isThumb() ? 0 : 0),
5665 CallThunk(target.isThumb() ? 4 : 4),
5666 FrameThunk(target.isThumb() ? 0 : 0),
5667 CallNoLRSave(target.isThumb() ? 4 : 4),
5668 FrameNoLRSave(target.isThumb() ? 2 : 4),
5669 CallRegSave(target.isThumb() ? 8 : 12),
5670 FrameRegSave(target.isThumb() ? 2 : 4),
5671 CallDefault(target.isThumb() ? 8 : 12),
5672 FrameDefault(target.isThumb() ? 2 : 4),
5673 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5674};
5675
5677ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5678 MachineFunction *MF = C.getMF();
5679 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5680 const ARMBaseRegisterInfo *ARI =
5681 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5682
5683 BitVector regsReserved = ARI->getReservedRegs(*MF);
5684 // Check if there is an available register across the sequence that we can
5685 // use.
5686 for (Register Reg : ARM::rGPRRegClass) {
5687 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5688 Reg != ARM::LR && // LR is not reserved, but don't use it.
5689 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5690 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5691 C.isAvailableInsideSeq(Reg, TRI))
5692 return Reg;
5693 }
5694 return Register();
5695}
5696
5697// Compute liveness of LR at the point after the interval [I, E), which
5698// denotes a *backward* iteration through instructions. Used only for return
5699// basic blocks, which do not end with a tail call.
5703 // At the end of the function LR dead.
5704 bool Live = false;
5705 for (; I != E; ++I) {
5706 const MachineInstr &MI = *I;
5707
5708 // Check defs of LR.
5709 if (MI.modifiesRegister(ARM::LR, &TRI))
5710 Live = false;
5711
5712 // Check uses of LR.
5713 unsigned Opcode = MI.getOpcode();
5714 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5715 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5716 Opcode == ARM::tBXNS_RET) {
5717 // These instructions use LR, but it's not an (explicit or implicit)
5718 // operand.
5719 Live = true;
5720 continue;
5721 }
5722 if (MI.readsRegister(ARM::LR, &TRI))
5723 Live = true;
5724 }
5725 return !Live;
5726}
5727
5728std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5730 const MachineModuleInfo &MMI,
5731 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5732 unsigned MinRepeats) const {
5733 unsigned SequenceSize = 0;
5734 for (auto &MI : RepeatedSequenceLocs[0])
5735 SequenceSize += getInstSizeInBytes(MI);
5736
5737 // Properties about candidate MBBs that hold for all of them.
5738 unsigned FlagsSetInAll = 0xF;
5739
5740 // Compute liveness information for each candidate, and set FlagsSetInAll.
5742 for (outliner::Candidate &C : RepeatedSequenceLocs)
5743 FlagsSetInAll &= C.Flags;
5744
5745 // According to the ARM Procedure Call Standard, the following are
5746 // undefined on entry/exit from a function call:
5747 //
5748 // * Register R12(IP),
5749 // * Condition codes (and thus the CPSR register)
5750 //
5751 // Since we control the instructions which are part of the outlined regions
5752 // we don't need to be fully compliant with the AAPCS, but we have to
5753 // guarantee that if a veneer is inserted at link time the code is still
5754 // correct. Because of this, we can't outline any sequence of instructions
5755 // where one of these registers is live into/across it. Thus, we need to
5756 // delete those candidates.
5757 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5758 // If the unsafe registers in this block are all dead, then we don't need
5759 // to compute liveness here.
5760 if (C.Flags & UnsafeRegsDead)
5761 return false;
5762 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5763 };
5764
5765 // Are there any candidates where those registers are live?
5766 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5767 // Erase every candidate that violates the restrictions above. (It could be
5768 // true that we have viable candidates, so it's not worth bailing out in
5769 // the case that, say, 1 out of 20 candidates violate the restructions.)
5770 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5771
5772 // If the sequence doesn't have enough candidates left, then we're done.
5773 if (RepeatedSequenceLocs.size() < MinRepeats)
5774 return std::nullopt;
5775 }
5776
5777 // We expect the majority of the outlining candidates to be in consensus with
5778 // regard to return address sign and authentication, and branch target
5779 // enforcement, in other words, partitioning according to all the four
5780 // possible combinations of PAC-RET and BTI is going to yield one big subset
5781 // and three small (likely empty) subsets. That allows us to cull incompatible
5782 // candidates separately for PAC-RET and BTI.
5783
5784 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5785 // disabled. Remove the candidates from the smaller set. If they are the same
5786 // number prefer the non-BTI ones for outlining, since they have less
5787 // overhead.
5788 auto NoBTI =
5789 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5790 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5791 return AFI.branchTargetEnforcement();
5792 });
5793 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5794 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5795 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5796 else
5797 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5798
5799 if (RepeatedSequenceLocs.size() < MinRepeats)
5800 return std::nullopt;
5801
5802 // Likewise, partition the candidates according to PAC-RET enablement.
5803 auto NoPAC =
5804 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5805 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5806 // If the function happens to not spill the LR, do not disqualify it
5807 // from the outlining.
5808 return AFI.shouldSignReturnAddress(true);
5809 });
5810 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5811 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5812 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5813 else
5814 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5815
5816 if (RepeatedSequenceLocs.size() < MinRepeats)
5817 return std::nullopt;
5818
5819 // At this point, we have only "safe" candidates to outline. Figure out
5820 // frame + call instruction information.
5821
5822 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5823
5824 // Helper lambda which sets call information for every candidate.
5825 auto SetCandidateCallInfo =
5826 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5827 for (outliner::Candidate &C : RepeatedSequenceLocs)
5828 C.setCallInfo(CallID, NumBytesForCall);
5829 };
5830
5831 OutlinerCosts Costs(Subtarget);
5832
5833 const auto &SomeMFI =
5834 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5835 // Adjust costs to account for the BTI instructions.
5836 if (SomeMFI.branchTargetEnforcement()) {
5837 Costs.FrameDefault += 4;
5838 Costs.FrameNoLRSave += 4;
5839 Costs.FrameRegSave += 4;
5840 Costs.FrameTailCall += 4;
5841 Costs.FrameThunk += 4;
5842 }
5843
5844 // Adjust costs to account for sign and authentication instructions.
5845 if (SomeMFI.shouldSignReturnAddress(true)) {
5846 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5847 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5848 }
5849
5850 unsigned FrameID = MachineOutlinerDefault;
5851 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5852
5853 // If the last instruction in any candidate is a terminator, then we should
5854 // tail call all of the candidates.
5855 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5856 FrameID = MachineOutlinerTailCall;
5857 NumBytesToCreateFrame = Costs.FrameTailCall;
5858 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5859 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5860 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5861 LastInstrOpcode == ARM::tBLXr ||
5862 LastInstrOpcode == ARM::tBLXr_noip ||
5863 LastInstrOpcode == ARM::tBLXi) {
5864 FrameID = MachineOutlinerThunk;
5865 NumBytesToCreateFrame = Costs.FrameThunk;
5866 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5867 } else {
5868 // We need to decide how to emit calls + frames. We can always emit the same
5869 // frame if we don't need to save to the stack. If we have to save to the
5870 // stack, then we need a different frame.
5871 unsigned NumBytesNoStackCalls = 0;
5872 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5873
5874 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5875 // LR liveness is overestimated in return blocks, unless they end with a
5876 // tail call.
5877 const auto Last = C.getMBB()->rbegin();
5878 const bool LRIsAvailable =
5879 C.getMBB()->isReturnBlock() && !Last->isCall()
5882 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
5883 if (LRIsAvailable) {
5884 FrameID = MachineOutlinerNoLRSave;
5885 NumBytesNoStackCalls += Costs.CallNoLRSave;
5886 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
5887 CandidatesWithoutStackFixups.push_back(C);
5888 }
5889
5890 // Is an unused register available? If so, we won't modify the stack, so
5891 // we can outline with the same frame type as those that don't save LR.
5892 else if (findRegisterToSaveLRTo(C)) {
5893 FrameID = MachineOutlinerRegSave;
5894 NumBytesNoStackCalls += Costs.CallRegSave;
5895 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
5896 CandidatesWithoutStackFixups.push_back(C);
5897 }
5898
5899 // Is SP used in the sequence at all? If not, we don't have to modify
5900 // the stack, so we are guaranteed to get the same frame.
5901 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
5902 NumBytesNoStackCalls += Costs.CallDefault;
5903 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5904 CandidatesWithoutStackFixups.push_back(C);
5905 }
5906
5907 // If we outline this, we need to modify the stack. Pretend we don't
5908 // outline this by saving all of its bytes.
5909 else
5910 NumBytesNoStackCalls += SequenceSize;
5911 }
5912
5913 // If there are no places where we have to save LR, then note that we don't
5914 // have to update the stack. Otherwise, give every candidate the default
5915 // call type
5916 if (NumBytesNoStackCalls <=
5917 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5918 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5919 FrameID = MachineOutlinerNoLRSave;
5920 if (RepeatedSequenceLocs.size() < MinRepeats)
5921 return std::nullopt;
5922 } else
5923 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5924 }
5925
5926 // Does every candidate's MBB contain a call? If so, then we might have a
5927 // call in the range.
5928 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5929 // check if the range contains a call. These require a save + restore of
5930 // the link register.
5931 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5932 if (any_of(drop_end(FirstCand),
5933 [](const MachineInstr &MI) { return MI.isCall(); }))
5934 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5935
5936 // Handle the last instruction separately. If it is tail call, then the
5937 // last instruction is a call, we don't want to save + restore in this
5938 // case. However, it could be possible that the last instruction is a
5939 // call without it being valid to tail call this sequence. We should
5940 // consider this as well.
5941 else if (FrameID != MachineOutlinerThunk &&
5942 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5943 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5944 }
5945
5946 return std::make_unique<outliner::OutlinedFunction>(
5947 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
5948}
5949
5950bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5951 int64_t Fixup,
5952 bool Updt) const {
5953 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
5954 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5955 if (SPIdx < 0)
5956 // No SP operand
5957 return true;
5958 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5959 // If SP is not the base register we can't do much
5960 return false;
5961
5962 // Stack might be involved but addressing mode doesn't handle any offset.
5963 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5964 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5965 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5966 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5967 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5968 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5969 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5970 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5971 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5972 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5974 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5975 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5976 return false;
5977
5978 unsigned NumOps = MI->getDesc().getNumOperands();
5979 unsigned ImmIdx = NumOps - 3;
5980
5981 const MachineOperand &Offset = MI->getOperand(ImmIdx);
5982 assert(Offset.isImm() && "Is not an immediate");
5983 int64_t OffVal = Offset.getImm();
5984
5985 if (OffVal < 0)
5986 // Don't override data if the are below SP.
5987 return false;
5988
5989 unsigned NumBits = 0;
5990 unsigned Scale = 1;
5991
5992 switch (AddrMode) {
5993 case ARMII::AddrMode3:
5994 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
5995 return false;
5996 OffVal = ARM_AM::getAM3Offset(OffVal);
5997 NumBits = 8;
5998 break;
5999 case ARMII::AddrMode5:
6000 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
6001 return false;
6002 OffVal = ARM_AM::getAM5Offset(OffVal);
6003 NumBits = 8;
6004 Scale = 4;
6005 break;
6007 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6008 return false;
6009 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6010 NumBits = 8;
6011 Scale = 2;
6012 break;
6014 NumBits = 8;
6015 break;
6017 // FIXME: Values are already scaled in this addressing mode.
6018 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6019 NumBits = 10;
6020 break;
6022 NumBits = 8;
6023 Scale = 4;
6024 break;
6027 NumBits = 12;
6028 break;
6029 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6030 NumBits = 8;
6031 Scale = 4;
6032 break;
6033 default:
6034 llvm_unreachable("Unsupported addressing mode!");
6035 }
6036 // Make sure the offset is encodable for instructions that scale the
6037 // immediate.
6038 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6039 "Can't encode this offset!");
6040 OffVal += Fixup / Scale;
6041
6042 unsigned Mask = (1 << NumBits) - 1;
6043
6044 if (OffVal <= Mask) {
6045 if (Updt)
6046 MI->getOperand(ImmIdx).setImm(OffVal);
6047 return true;
6048 }
6049
6050 return false;
6051}
6052
6054 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6055 outliner::Candidate &C = Candidates.front();
6056 // branch-target-enforcement is guaranteed to be consistent between all
6057 // candidates, so we only need to look at one.
6058 const Function &CFn = C.getMF()->getFunction();
6059 if (CFn.hasFnAttribute("branch-target-enforcement"))
6060 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6061
6062 if (CFn.hasFnAttribute("sign-return-address"))
6063 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6064
6065 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6066}
6067
6069 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6070 const Function &F = MF.getFunction();
6071
6072 // Can F be deduplicated by the linker? If it can, don't outline from it.
6073 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6074 return false;
6075
6076 // Don't outline from functions with section markings; the program could
6077 // expect that all the code is in the named section.
6078 // FIXME: Allow outlining from multiple functions with the same section
6079 // marking.
6080 if (F.hasSection())
6081 return false;
6082
6083 // FIXME: Thumb1 outlining is not handled
6085 return false;
6086
6087 // It's safe to outline from MF.
6088 return true;
6089}
6090
6092 unsigned &Flags) const {
6093 // Check if LR is available through all of the MBB. If it's not, then set
6094 // a flag.
6095 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6096 "Suitable Machine Function for outlining must track liveness");
6097
6099
6101 LRU.accumulate(MI);
6102
6103 // Check if each of the unsafe registers are available...
6104 bool R12AvailableInBlock = LRU.available(ARM::R12);
6105 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6106
6107 // If all of these are dead (and not live out), we know we don't have to check
6108 // them later.
6109 if (R12AvailableInBlock && CPSRAvailableInBlock)
6111
6112 // Now, add the live outs to the set.
6113 LRU.addLiveOuts(MBB);
6114
6115 // If any of these registers is available in the MBB, but also a live out of
6116 // the block, then we know outlining is unsafe.
6117 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6118 return false;
6119 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6120 return false;
6121
6122 // Check if there's a call inside this MachineBasicBlock. If there is, then
6123 // set a flag.
6124 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6126
6127 // LR liveness is overestimated in return blocks.
6128
6129 bool LRIsAvailable =
6130 MBB.isReturnBlock() && !MBB.back().isCall()
6131 ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
6132 : LRU.available(ARM::LR);
6133 if (!LRIsAvailable)
6135
6136 return true;
6137}
6138
6142 unsigned Flags) const {
6143 MachineInstr &MI = *MIT;
6145
6146 // PIC instructions contain labels, outlining them would break offset
6147 // computing. unsigned Opc = MI.getOpcode();
6148 unsigned Opc = MI.getOpcode();
6149 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6150 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6151 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6152 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6153 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6154 Opc == ARM::t2MOV_ga_pcrel)
6156
6157 // Be conservative with ARMv8.1 MVE instructions.
6158 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6159 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6160 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6161 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6162 Opc == ARM::t2LoopEndDec)
6164
6165 const MCInstrDesc &MCID = MI.getDesc();
6166 uint64_t MIFlags = MCID.TSFlags;
6167 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6169
6170 // Is this a terminator for a basic block?
6171 if (MI.isTerminator())
6172 // TargetInstrInfo::getOutliningType has already filtered out anything
6173 // that would break this, so we can allow it here.
6175
6176 // Don't outline if link register or program counter value are used.
6177 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6179
6180 if (MI.isCall()) {
6181 // Get the function associated with the call. Look at each operand and find
6182 // the one that represents the calle and get its name.
6183 const Function *Callee = nullptr;
6184 for (const MachineOperand &MOP : MI.operands()) {
6185 if (MOP.isGlobal()) {
6186 Callee = dyn_cast<Function>(MOP.getGlobal());
6187 break;
6188 }
6189 }
6190
6191 // Dont't outline calls to "mcount" like functions, in particular Linux
6192 // kernel function tracing relies on it.
6193 if (Callee &&
6194 (Callee->getName() == "\01__gnu_mcount_nc" ||
6195 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6197
6198 // If we don't know anything about the callee, assume it depends on the
6199 // stack layout of the caller. In that case, it's only legal to outline
6200 // as a tail-call. Explicitly list the call instructions we know about so
6201 // we don't get unexpected results with call pseudo-instructions.
6202 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6203 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6204 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6205 Opc == ARM::tBLXi)
6206 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6207
6208 if (!Callee)
6209 return UnknownCallOutlineType;
6210
6211 // We have a function we have information about. Check if it's something we
6212 // can safely outline.
6213 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6214
6215 // We don't know what's going on with the callee at all. Don't touch it.
6216 if (!CalleeMF)
6217 return UnknownCallOutlineType;
6218
6219 // Check if we know anything about the callee saves on the function. If we
6220 // don't, then don't touch it, since that implies that we haven't computed
6221 // anything about its stack frame yet.
6222 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6223 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6224 MFI.getNumObjects() > 0)
6225 return UnknownCallOutlineType;
6226
6227 // At this point, we can say that CalleeMF ought to not pass anything on the
6228 // stack. Therefore, we can outline it.
6230 }
6231
6232 // Since calls are handled, don't touch LR or PC
6233 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6235
6236 // Does this use the stack?
6237 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6238 // True if there is no chance that any outlined candidate from this range
6239 // could require stack fixups. That is, both
6240 // * LR is available in the range (No save/restore around call)
6241 // * The range doesn't include calls (No save/restore in outlined frame)
6242 // are true.
6243 // These conditions also ensure correctness of the return address
6244 // authentication - we insert sign and authentication instructions only if
6245 // we save/restore LR on stack, but then this condition ensures that the
6246 // outlined range does not modify the SP, therefore the SP value used for
6247 // signing is the same as the one used for authentication.
6248 // FIXME: This is very restrictive; the flags check the whole block,
6249 // not just the bit we will try to outline.
6250 bool MightNeedStackFixUp =
6253
6254 if (!MightNeedStackFixUp)
6256
6257 // Any modification of SP will break our code to save/restore LR.
6258 // FIXME: We could handle some instructions which add a constant offset to
6259 // SP, with a bit more work.
6260 if (MI.modifiesRegister(ARM::SP, TRI))
6262
6263 // At this point, we have a stack instruction that we might need to fix up.
6264 // up. We'll handle it if it's a load or store.
6265 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6266 false))
6268
6269 // We can't fix it up, so don't outline it.
6271 }
6272
6273 // Be conservative with IT blocks.
6274 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6275 MI.modifiesRegister(ARM::ITSTATE, TRI))
6277
6278 // Don't outline CFI instructions.
6279 if (MI.isCFIInstruction())
6281
6283}
6284
6285void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6286 for (MachineInstr &MI : MBB) {
6287 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6288 }
6289}
6290
6291void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6292 MachineBasicBlock::iterator It, bool CFI,
6293 bool Auth) const {
6294 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6295 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6296 assert(Align >= 8 && Align <= 256);
6297 if (Auth) {
6298 assert(Subtarget.isThumb2());
6299 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6300 // sequence.
6301 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6302 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6303 .addReg(ARM::R12, RegState::Kill)
6304 .addReg(ARM::LR, RegState::Kill)
6305 .addReg(ARM::SP)
6306 .addImm(-Align)
6308 .setMIFlags(MIFlags);
6309 } else {
6310 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6311 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6312 .addReg(ARM::LR, RegState::Kill)
6313 .addReg(ARM::SP)
6314 .addImm(-Align)
6316 .setMIFlags(MIFlags);
6317 }
6318
6319 if (!CFI)
6320 return;
6321
6322 // Add a CFI, saying CFA is offset by Align bytes from SP.
6323 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
6324 CFIBuilder.buildDefCFAOffset(Align);
6325
6326 // Add a CFI saying that the LR that we want to find is now higher than
6327 // before.
6328 int LROffset = Auth ? Align - 4 : Align;
6329 CFIBuilder.buildOffset(ARM::LR, -LROffset);
6330 if (Auth) {
6331 // Add a CFI for the location of the return adddress PAC.
6332 CFIBuilder.buildOffset(ARM::RA_AUTH_CODE, -Align);
6333 }
6334}
6335
6336void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6338 bool CFI, bool Auth) const {
6339 int Align = Subtarget.getStackAlignment().value();
6340 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6341 if (Auth) {
6342 assert(Subtarget.isThumb2());
6343 // Restore return address PAC and LR.
6344 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6345 .addReg(ARM::R12, RegState::Define)
6346 .addReg(ARM::LR, RegState::Define)
6347 .addReg(ARM::SP, RegState::Define)
6348 .addReg(ARM::SP)
6349 .addImm(Align)
6351 .setMIFlags(MIFlags);
6352 // LR authentication is after the CFI instructions, below.
6353 } else {
6354 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6355 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6356 .addReg(ARM::SP, RegState::Define)
6357 .addReg(ARM::SP);
6358 if (!Subtarget.isThumb())
6359 MIB.addReg(0);
6360 MIB.addImm(Subtarget.getStackAlignment().value())
6362 .setMIFlags(MIFlags);
6363 }
6364
6365 if (CFI) {
6366 // Now stack has moved back up and we have restored LR.
6367 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy);
6368 CFIBuilder.buildDefCFAOffset(0);
6369 CFIBuilder.buildRestore(ARM::LR);
6370 if (Auth)
6371 CFIBuilder.buildUndefined(ARM::RA_AUTH_CODE);
6372 }
6373
6374 if (Auth)
6375 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6376}
6377
6380 const outliner::OutlinedFunction &OF) const {
6381 // For thunk outlining, rewrite the last instruction from a call to a
6382 // tail-call.
6383 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6384 MachineInstr *Call = &*--MBB.instr_end();
6385 bool isThumb = Subtarget.isThumb();
6386 unsigned FuncOp = isThumb ? 2 : 0;
6387 unsigned Opc = Call->getOperand(FuncOp).isReg()
6388 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6389 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6390 : ARM::tTAILJMPdND
6391 : ARM::TAILJMPd;
6392 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6393 .add(Call->getOperand(FuncOp));
6394 if (isThumb && !Call->getOperand(FuncOp).isReg())
6395 MIB.add(predOps(ARMCC::AL));
6396 Call->eraseFromParent();
6397 }
6398
6399 // Is there a call in the outlined range?
6400 auto IsNonTailCall = [](MachineInstr &MI) {
6401 return MI.isCall() && !MI.isReturn();
6402 };
6403 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6404 MachineBasicBlock::iterator It = MBB.begin();
6406
6407 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6408 OF.FrameConstructionID == MachineOutlinerThunk)
6409 Et = std::prev(MBB.end());
6410
6411 // We have to save and restore LR, we need to add it to the liveins if it
6412 // is not already part of the set. This is suffient since outlined
6413 // functions only have one block.
6414 if (!MBB.isLiveIn(ARM::LR))
6415 MBB.addLiveIn(ARM::LR);
6416
6417 // Insert a save before the outlined region
6418 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6419 saveLROnStack(MBB, It, true, Auth);
6420
6421 // Fix up the instructions in the range, since we're going to modify the
6422 // stack.
6423 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6424 "Can only fix up stack references once");
6425 fixupPostOutline(MBB);
6426
6427 // Insert a restore before the terminator for the function. Restore LR.
6428 restoreLRFromStack(MBB, Et, true, Auth);
6429 }
6430
6431 // If this is a tail call outlined function, then there's already a return.
6432 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6433 OF.FrameConstructionID == MachineOutlinerThunk)
6434 return;
6435
6436 // Here we have to insert the return ourselves. Get the correct opcode from
6437 // current feature set.
6438 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6440
6441 // Did we have to modify the stack by saving the link register?
6442 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6443 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6444 return;
6445
6446 // We modified the stack.
6447 // Walk over the basic block and fix up all the stack accesses.
6448 fixupPostOutline(MBB);
6449}
6450
6456 unsigned Opc;
6457 bool isThumb = Subtarget.isThumb();
6458
6459 // Are we tail calling?
6460 if (C.CallConstructionID == MachineOutlinerTailCall) {
6461 // If yes, then we can just branch to the label.
6462 Opc = isThumb
6463 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6464 : ARM::TAILJMPd;
6465 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6466 .addGlobalAddress(M.getNamedValue(MF.getName()));
6467 if (isThumb)
6468 MIB.add(predOps(ARMCC::AL));
6469 It = MBB.insert(It, MIB);
6470 return It;
6471 }
6472
6473 // Create the call instruction.
6474 Opc = isThumb ? ARM::tBL : ARM::BL;
6475 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6476 if (isThumb)
6477 CallMIB.add(predOps(ARMCC::AL));
6478 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6479
6480 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6481 C.CallConstructionID == MachineOutlinerThunk) {
6482 // No, so just insert the call.
6483 It = MBB.insert(It, CallMIB);
6484 return It;
6485 }
6486
6487 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6488 // Can we save to a register?
6489 if (C.CallConstructionID == MachineOutlinerRegSave) {
6490 Register Reg = findRegisterToSaveLRTo(C);
6491 assert(Reg != 0 && "No callee-saved register available?");
6492
6493 // Save and restore LR from that register.
6494 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6495 if (!AFI.isLRSpilled())
6497 .buildRegister(ARM::LR, Reg);
6498 CallPt = MBB.insert(It, CallMIB);
6499 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6500 if (!AFI.isLRSpilled())
6502 It--;
6503 return CallPt;
6504 }
6505 // We have the default case. Save and restore from SP.
6506 if (!MBB.isLiveIn(ARM::LR))
6507 MBB.addLiveIn(ARM::LR);
6508 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6509 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6510 CallPt = MBB.insert(It, CallMIB);
6511 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6512 It--;
6513 return CallPt;
6514}
6515
6517 MachineFunction &MF) const {
6518 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6519}
6520
6521bool ARMBaseInstrInfo::isReMaterializableImpl(
6522 const MachineInstr &MI) const {
6523 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6524 // the tail predication conversion. This means that the element count
6525 // register has to be live for longer, but that has to be better than
6526 // spill/restore and VPT predication.
6527 return (isVCTP(&MI) && !isPredicated(MI)) ||
6529}
6530
6532 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6533 : ARM::BLX;
6534}
6535
6537 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6538 : ARM::tBLXr;
6539}
6540
6542 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6543 : ARM::BLX_pred;
6544}
6545
6546namespace {
6547class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6548 MachineInstr *EndLoop, *LoopCount;
6549 MachineFunction *MF;
6550 const TargetInstrInfo *TII;
6551
6552 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6553 // [LAST_IS_USE] : last reference to register in schedule is a use
6554 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6555 static int constexpr MAX_STAGES = 30;
6556 static int constexpr LAST_IS_USE = MAX_STAGES;
6557 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6558 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6559 typedef std::map<Register, IterNeed> IterNeeds;
6560
6561 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6562 const IterNeeds &CIN);
6563 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6564
6565 // Meanings of the various stuff with loop types:
6566 // t2Bcc:
6567 // EndLoop = branch at end of original BB that will become a kernel
6568 // LoopCount = CC setter live into branch
6569 // t2LoopEnd:
6570 // EndLoop = branch at end of original BB
6571 // LoopCount = t2LoopDec
6572public:
6573 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6574 : EndLoop(EndLoop), LoopCount(LoopCount),
6575 MF(EndLoop->getParent()->getParent()),
6576 TII(MF->getSubtarget().getInstrInfo()) {}
6577
6578 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6579 // Only ignore the terminator.
6580 return MI == EndLoop || MI == LoopCount;
6581 }
6582
6583 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6584 if (tooMuchRegisterPressure(SSD, SMS))
6585 return false;
6586
6587 return true;
6588 }
6589
6590 std::optional<bool> createTripCountGreaterCondition(
6591 int TC, MachineBasicBlock &MBB,
6592 SmallVectorImpl<MachineOperand> &Cond) override {
6593
6594 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6595 Cond.push_back(EndLoop->getOperand(1));
6596 Cond.push_back(EndLoop->getOperand(2));
6597 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6599 }
6600 return {};
6601 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6602 // General case just lets the unrolled t2LoopDec do the subtraction and
6603 // therefore just needs to check if zero has been reached.
6604 MachineInstr *LoopDec = nullptr;
6605 for (auto &I : MBB.instrs())
6606 if (I.getOpcode() == ARM::t2LoopDec)
6607 LoopDec = &I;
6608 assert(LoopDec && "Unable to find copied LoopDec");
6609 // Check if we're done with the loop.
6610 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6611 .addReg(LoopDec->getOperand(0).getReg())
6612 .addImm(0)
6614 .addReg(ARM::NoRegister);
6616 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6617 return {};
6618 } else
6619 llvm_unreachable("Unknown EndLoop");
6620 }
6621
6622 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6623
6624 void adjustTripCount(int TripCountAdjust) override {}
6625};
6626
6627void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6628 const IterNeeds &CIN) {
6629 // Increase pressure by the amounts in CrossIterationNeeds
6630 for (const auto &N : CIN) {
6631 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6632 for (int I = 0; I < Cnt; ++I)
6635 }
6636 // Decrease pressure by the amounts in CrossIterationNeeds
6637 for (const auto &N : CIN) {
6638 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6639 for (int I = 0; I < Cnt; ++I)
6642 }
6643}
6644
6645bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6646 SMSchedule &SMS) {
6647 IterNeeds CrossIterationNeeds;
6648
6649 // Determine which values will be loop-carried after the schedule is
6650 // applied
6651
6652 for (auto &SU : SSD.SUnits) {
6653 const MachineInstr *MI = SU.getInstr();
6654 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6655 for (auto &S : SU.Succs)
6656 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6657 Register Reg = S.getReg();
6658 if (Reg.isVirtual())
6659 CrossIterationNeeds[Reg.id()].set(0);
6660 } else if (S.isAssignedRegDep()) {
6661 int OStg = SMS.stageScheduled(S.getSUnit());
6662 if (OStg >= 0 && OStg != Stg) {
6663 Register Reg = S.getReg();
6664 if (Reg.isVirtual())
6665 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6666 }
6667 }
6668 }
6669
6670 // Determine more-or-less what the proposed schedule (reversed) is going to
6671 // be; it might not be quite the same because the within-cycle ordering
6672 // created by SMSchedule depends upon changes to help with address offsets and
6673 // the like.
6674 std::vector<SUnit *> ProposedSchedule;
6675 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6676 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6677 ++Stage) {
6678 std::deque<SUnit *> Instrs =
6679 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6680 std::sort(Instrs.begin(), Instrs.end(),
6681 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6682 llvm::append_range(ProposedSchedule, Instrs);
6683 }
6684
6685 // Learn whether the last use/def of each cross-iteration register is a use or
6686 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6687 // and we do not have to add the pressure.
6688 for (auto *SU : ProposedSchedule)
6689 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6690 ++OperI) {
6691 auto MO = *OperI;
6692 if (!MO.isReg() || !MO.getReg())
6693 continue;
6694 Register Reg = MO.getReg();
6695 auto CIter = CrossIterationNeeds.find(Reg.id());
6696 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6697 CIter->second[SEEN_AS_LIVE])
6698 continue;
6699 if (MO.isDef() && !MO.isDead())
6700 CIter->second.set(SEEN_AS_LIVE);
6701 else if (MO.isUse())
6702 CIter->second.set(LAST_IS_USE);
6703 }
6704 for (auto &CI : CrossIterationNeeds)
6705 CI.second.reset(LAST_IS_USE);
6706
6707 RegionPressure RecRegPressure;
6708 RegPressureTracker RPTracker(RecRegPressure);
6709 RegisterClassInfo RegClassInfo;
6710 RegClassInfo.runOnMachineFunction(*MF);
6711 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6712 EndLoop->getParent()->end(), false, false);
6713
6714 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6715
6716 for (auto *SU : ProposedSchedule) {
6717 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6718 RPTracker.setPos(std::next(CurInstI));
6719 RPTracker.recede();
6720
6721 // Track what cross-iteration registers would be seen as live
6722 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6723 auto MO = *OperI;
6724 if (!MO.isReg() || !MO.getReg())
6725 continue;
6726 Register Reg = MO.getReg();
6727 if (MO.isDef() && !MO.isDead()) {
6728 auto CIter = CrossIterationNeeds.find(Reg.id());
6729 if (CIter != CrossIterationNeeds.end()) {
6730 CIter->second.reset(0);
6731 CIter->second.reset(SEEN_AS_LIVE);
6732 }
6733 }
6734 }
6735 for (auto &S : SU->Preds) {
6736 auto Stg = SMS.stageScheduled(SU);
6737 if (S.isAssignedRegDep()) {
6738 Register Reg = S.getReg();
6739 auto CIter = CrossIterationNeeds.find(Reg.id());
6740 if (CIter != CrossIterationNeeds.end()) {
6741 auto Stg2 = SMS.stageScheduled(S.getSUnit());
6742 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6743 if (Stg - Stg2 < MAX_STAGES)
6744 CIter->second.set(Stg - Stg2);
6745 CIter->second.set(SEEN_AS_LIVE);
6746 }
6747 }
6748 }
6749
6750 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6751 }
6752
6753 auto &P = RPTracker.getPressure().MaxSetPressure;
6754 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6755 // Exclude some Neon register classes.
6756 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6757 I == ARM::DTriple_with_qsub_0_in_QPR)
6758 continue;
6759
6760 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6761 return true;
6762 }
6763 }
6764 return false;
6765}
6766
6767} // namespace
6768
6769std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6772 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6773 if (Preheader == LoopBB)
6774 Preheader = *std::next(LoopBB->pred_begin());
6775
6776 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6777 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6778 // block. We need to determine the reaching definition of CPSR so that
6779 // it can be marked as non-pipelineable, allowing the pipeliner to force
6780 // it into stage 0 or give up if it cannot or will not do so.
6781 MachineInstr *CCSetter = nullptr;
6782 for (auto &L : LoopBB->instrs()) {
6783 if (L.isCall())
6784 return nullptr;
6785 if (isCPSRDefined(L))
6786 CCSetter = &L;
6787 }
6788 if (CCSetter)
6789 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6790 else
6791 return nullptr; // Unable to find the CC setter, so unable to guarantee
6792 // that pipeline will work
6793 }
6794
6795 // Recognize:
6796 // preheader:
6797 // %1 = t2DoopLoopStart %0
6798 // loop:
6799 // %2 = phi %1, <not loop>, %..., %loop
6800 // %3 = t2LoopDec %2, <imm>
6801 // t2LoopEnd %3, %loop
6802
6803 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6804 for (auto &L : LoopBB->instrs())
6805 if (L.isCall())
6806 return nullptr;
6807 else if (isVCTP(&L))
6808 return nullptr;
6809 Register LoopDecResult = I->getOperand(0).getReg();
6811 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
6812 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6813 return nullptr;
6814 MachineInstr *LoopStart = nullptr;
6815 for (auto &J : Preheader->instrs())
6816 if (J.getOpcode() == ARM::t2DoLoopStart)
6817 LoopStart = &J;
6818 if (!LoopStart)
6819 return nullptr;
6820 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
6821 }
6822 return nullptr;
6823}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, RegState State) const
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
const ARMBaseRegisterInfo & getRegisterInfo() const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ARMBaseInstrInfo(const ARMSubtarget &STI, const ARMBaseRegisterInfo &TRI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isCortexA7() const
bool isSwift() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool isThumb2() const
bool isLikeA9() const
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool hasMinSize() const
bool isCortexA8() const
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
@ SingleIssue
Can load/store 1 register/cycle.
@ DoubleIssue
Can load/store 2 registers/cycle.
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:178
LLVM_ABI uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
Helper class for creating CFI instructions and inserting them into MIR.
void buildRegister(MCRegister Reg1, MCRegister Reg2) const
void buildRestore(MCRegister Reg) const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A debug info location.
Definition DebugLoc.h:123
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:712
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:709
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
bool hasDLLImportStorageClass() const
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
Describe properties that are true of each instruction in the target description file.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isCall() const
Return true if the instruction is a call.
unsigned short Opcode
unsigned getOpcode() const
Return the opcode number for this descriptor.
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool isValid() const
isValid - Returns true until all the operands have been visited.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
This class is a data container for one entry in a MachineConstantPool.
union llvm::MachineConstantPoolEntry::@004270020304201266316354007027341142157160323045 Val
The constant itself.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
ArrayRef< MachineMemOperand * >::iterator mmo_iterator
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
bool isInsertSubreg() const
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void increaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
LLVM_ABI void decreaseRegPressure(VirtRegOrUnit VRegOrUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition Register.h:60
constexpr unsigned id() const
Definition Register.h:100
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
ARMII - This namespace holds all of the target specific flags that instruction info tracks.
@ ThumbArithFlagSetting
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
AddrMode
ARM Addressing Modes.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
Define some predicates that are used for node matching.
Definition ARMEHABI.h:25
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
constexpr T rotr(T V, int R)
Definition bit.h:382
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
constexpr RegState getKillRegState(bool B)
unsigned getBLXpredOpcode(const MachineFunction &MF)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static bool isJumpTableBranchOpcode(int Opc)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static bool isPopOpcode(int Opc)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:2023
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
constexpr RegState getUndefRegState(bool B)
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.