LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/MC/MCAsmInfo.h"
43#include "llvm/MC/MCInst.h"
45#include "llvm/MC/MCInstrDesc.h"
50#include "llvm/Support/LEB128.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57#include <utility>
58
59using namespace llvm;
60
61#define GET_INSTRINFO_CTOR_DTOR
62#include "AArch64GenInstrInfo.inc"
63
65 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
66 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
67
69 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
70 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
71
73 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
74 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
75
77 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
78 cl::desc("Restrict range of B instructions (DEBUG)"));
79
81 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
82 AArch64::CATCHRET),
83 RI(STI.getTargetTriple()), Subtarget(STI) {}
84
85/// GetInstSize - Return the number of bytes of code the specified
86/// instruction may be. This returns the maximum number of bytes.
88 const MachineBasicBlock &MBB = *MI.getParent();
89 const MachineFunction *MF = MBB.getParent();
90 const Function &F = MF->getFunction();
91 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
92
93 {
94 auto Op = MI.getOpcode();
95 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
96 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
97 }
98
99 // Meta-instructions emit no code.
100 if (MI.isMetaInstruction())
101 return 0;
102
103 // FIXME: We currently only handle pseudoinstructions that don't get expanded
104 // before the assembly printer.
105 unsigned NumBytes = 0;
106 const MCInstrDesc &Desc = MI.getDesc();
107
108 // Size should be preferably set in
109 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
110 // Specific cases handle instructions of variable sizes
111 switch (Desc.getOpcode()) {
112 default:
113 if (Desc.getSize())
114 return Desc.getSize();
115
116 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
117 // with fixed constant size but not specified in .td file) is a normal
118 // 4-byte insn.
119 NumBytes = 4;
120 break;
121 case TargetOpcode::STACKMAP:
122 // The upper bound for a stackmap intrinsic is the full length of its shadow
123 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
124 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
125 break;
126 case TargetOpcode::PATCHPOINT:
127 // The size of the patchpoint intrinsic is the number of bytes requested
128 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
129 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
130 break;
131 case TargetOpcode::STATEPOINT:
132 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
133 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
134 // No patch bytes means a normal call inst is emitted
135 if (NumBytes == 0)
136 NumBytes = 4;
137 break;
138 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
139 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
140 // instructions are expanded to the specified number of NOPs. Otherwise,
141 // they are expanded to 36-byte XRay sleds.
142 NumBytes =
143 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
144 break;
145 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
146 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
147 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
148 NumBytes = 36;
149 break;
150 case TargetOpcode::PATCHABLE_EVENT_CALL:
151 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
152 NumBytes = 24;
153 break;
154
155 case AArch64::SPACE:
156 NumBytes = MI.getOperand(1).getImm();
157 break;
158 case TargetOpcode::BUNDLE:
159 NumBytes = getInstBundleLength(MI);
160 break;
161 }
162
163 return NumBytes;
164}
165
166unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
167 unsigned Size = 0;
169 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
170 while (++I != E && I->isInsideBundle()) {
171 assert(!I->isBundle() && "No nested bundle!");
173 }
174 return Size;
175}
176
179 // Block ends with fall-through condbranch.
180 switch (LastInst->getOpcode()) {
181 default:
182 llvm_unreachable("Unknown branch instruction?");
183 case AArch64::Bcc:
184 Target = LastInst->getOperand(1).getMBB();
185 Cond.push_back(LastInst->getOperand(0));
186 break;
187 case AArch64::CBZW:
188 case AArch64::CBZX:
189 case AArch64::CBNZW:
190 case AArch64::CBNZX:
191 Target = LastInst->getOperand(1).getMBB();
192 Cond.push_back(MachineOperand::CreateImm(-1));
193 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
194 Cond.push_back(LastInst->getOperand(0));
195 break;
196 case AArch64::TBZW:
197 case AArch64::TBZX:
198 case AArch64::TBNZW:
199 case AArch64::TBNZX:
200 Target = LastInst->getOperand(2).getMBB();
201 Cond.push_back(MachineOperand::CreateImm(-1));
202 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
203 Cond.push_back(LastInst->getOperand(0));
204 Cond.push_back(LastInst->getOperand(1));
205 }
206}
207
208static unsigned getBranchDisplacementBits(unsigned Opc) {
209 switch (Opc) {
210 default:
211 llvm_unreachable("unexpected opcode!");
212 case AArch64::B:
213 return BDisplacementBits;
214 case AArch64::TBNZW:
215 case AArch64::TBZW:
216 case AArch64::TBNZX:
217 case AArch64::TBZX:
218 return TBZDisplacementBits;
219 case AArch64::CBNZW:
220 case AArch64::CBZW:
221 case AArch64::CBNZX:
222 case AArch64::CBZX:
223 return CBZDisplacementBits;
224 case AArch64::Bcc:
225 return BCCDisplacementBits;
226 }
227}
228
230 int64_t BrOffset) const {
231 unsigned Bits = getBranchDisplacementBits(BranchOp);
232 assert(Bits >= 3 && "max branch displacement must be enough to jump"
233 "over conditional branch expansion");
234 return isIntN(Bits, BrOffset / 4);
235}
236
239 switch (MI.getOpcode()) {
240 default:
241 llvm_unreachable("unexpected opcode!");
242 case AArch64::B:
243 return MI.getOperand(0).getMBB();
244 case AArch64::TBZW:
245 case AArch64::TBNZW:
246 case AArch64::TBZX:
247 case AArch64::TBNZX:
248 return MI.getOperand(2).getMBB();
249 case AArch64::CBZW:
250 case AArch64::CBNZW:
251 case AArch64::CBZX:
252 case AArch64::CBNZX:
253 case AArch64::Bcc:
254 return MI.getOperand(1).getMBB();
255 }
256}
257
259 MachineBasicBlock &NewDestBB,
260 MachineBasicBlock &RestoreBB,
261 const DebugLoc &DL,
262 int64_t BrOffset,
263 RegScavenger *RS) const {
264 assert(RS && "RegScavenger required for long branching");
265 assert(MBB.empty() &&
266 "new block should be inserted for expanding unconditional branch");
267 assert(MBB.pred_size() == 1);
268 assert(RestoreBB.empty() &&
269 "restore block should be inserted for restoring clobbered registers");
270
271 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
272 // Offsets outside of the signed 33-bit range are not supported for ADRP +
273 // ADD.
274 if (!isInt<33>(BrOffset))
276 "Branch offsets outside of the signed 33-bit range not supported");
277
278 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
279 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
281 .addReg(Reg)
282 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
283 .addImm(0);
284 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
285 };
286
288 // If X16 is unused, we can rely on the linker to insert a range extension
289 // thunk if NewDestBB is out of range of a single B instruction.
290 constexpr Register Reg = AArch64::X16;
291 if (!RS->isRegUsed(Reg)) {
292 insertUnconditionalBranch(MBB, &NewDestBB, DL);
293 RS->setRegUsed(Reg);
294 return;
295 }
296
297 // If there's a free register and it's worth inflating the code size,
298 // manually insert the indirect branch.
299 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
300 if (Scavenged != AArch64::NoRegister &&
302 buildIndirectBranch(Scavenged, NewDestBB);
303 RS->setRegUsed(Scavenged);
304 return;
305 }
306
307 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
308 // with red zones.
310 if (!AFI || AFI->hasRedZone().value_or(true))
312 "Unable to insert indirect branch inside function that has red zone");
313
314 // Otherwise, spill X16 and defer range extension to the linker.
315 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
316 .addReg(AArch64::SP, RegState::Define)
317 .addReg(Reg)
318 .addReg(AArch64::SP)
319 .addImm(-16);
320
321 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
322
323 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
324 .addReg(AArch64::SP, RegState::Define)
326 .addReg(AArch64::SP)
327 .addImm(16);
328}
329
330// Branch analysis.
333 MachineBasicBlock *&FBB,
335 bool AllowModify) const {
336 // If the block has no terminators, it just falls into the block after it.
338 if (I == MBB.end())
339 return false;
340
341 // Skip over SpeculationBarrierEndBB terminators
342 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
343 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
344 --I;
345 }
346
347 if (!isUnpredicatedTerminator(*I))
348 return false;
349
350 // Get the last instruction in the block.
351 MachineInstr *LastInst = &*I;
352
353 // If there is only one terminator instruction, process it.
354 unsigned LastOpc = LastInst->getOpcode();
355 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
356 if (isUncondBranchOpcode(LastOpc)) {
357 TBB = LastInst->getOperand(0).getMBB();
358 return false;
359 }
360 if (isCondBranchOpcode(LastOpc)) {
361 // Block ends with fall-through condbranch.
362 parseCondBranch(LastInst, TBB, Cond);
363 return false;
364 }
365 return true; // Can't handle indirect branch.
366 }
367
368 // Get the instruction before it if it is a terminator.
369 MachineInstr *SecondLastInst = &*I;
370 unsigned SecondLastOpc = SecondLastInst->getOpcode();
371
372 // If AllowModify is true and the block ends with two or more unconditional
373 // branches, delete all but the first unconditional branch.
374 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
375 while (isUncondBranchOpcode(SecondLastOpc)) {
376 LastInst->eraseFromParent();
377 LastInst = SecondLastInst;
378 LastOpc = LastInst->getOpcode();
379 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
380 // Return now the only terminator is an unconditional branch.
381 TBB = LastInst->getOperand(0).getMBB();
382 return false;
383 }
384 SecondLastInst = &*I;
385 SecondLastOpc = SecondLastInst->getOpcode();
386 }
387 }
388
389 // If we're allowed to modify and the block ends in a unconditional branch
390 // which could simply fallthrough, remove the branch. (Note: This case only
391 // matters when we can't understand the whole sequence, otherwise it's also
392 // handled by BranchFolding.cpp.)
393 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
395 LastInst->eraseFromParent();
396 LastInst = SecondLastInst;
397 LastOpc = LastInst->getOpcode();
398 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
399 assert(!isUncondBranchOpcode(LastOpc) &&
400 "unreachable unconditional branches removed above");
401
402 if (isCondBranchOpcode(LastOpc)) {
403 // Block ends with fall-through condbranch.
404 parseCondBranch(LastInst, TBB, Cond);
405 return false;
406 }
407 return true; // Can't handle indirect branch.
408 }
409 SecondLastInst = &*I;
410 SecondLastOpc = SecondLastInst->getOpcode();
411 }
412
413 // If there are three terminators, we don't know what sort of block this is.
414 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
415 return true;
416
417 // If the block ends with a B and a Bcc, handle it.
418 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
419 parseCondBranch(SecondLastInst, TBB, Cond);
420 FBB = LastInst->getOperand(0).getMBB();
421 return false;
422 }
423
424 // If the block ends with two unconditional branches, handle it. The second
425 // one is not executed, so remove it.
426 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
427 TBB = SecondLastInst->getOperand(0).getMBB();
428 I = LastInst;
429 if (AllowModify)
430 I->eraseFromParent();
431 return false;
432 }
433
434 // ...likewise if it ends with an indirect branch followed by an unconditional
435 // branch.
436 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
437 I = LastInst;
438 if (AllowModify)
439 I->eraseFromParent();
440 return true;
441 }
442
443 // Otherwise, can't handle this.
444 return true;
445}
446
448 MachineBranchPredicate &MBP,
449 bool AllowModify) const {
450 // For the moment, handle only a block which ends with a cb(n)zx followed by
451 // a fallthrough. Why this? Because it is a common form.
452 // TODO: Should we handle b.cc?
453
455 if (I == MBB.end())
456 return true;
457
458 // Skip over SpeculationBarrierEndBB terminators
459 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
460 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
461 --I;
462 }
463
464 if (!isUnpredicatedTerminator(*I))
465 return true;
466
467 // Get the last instruction in the block.
468 MachineInstr *LastInst = &*I;
469 unsigned LastOpc = LastInst->getOpcode();
470 if (!isCondBranchOpcode(LastOpc))
471 return true;
472
473 switch (LastOpc) {
474 default:
475 return true;
476 case AArch64::CBZW:
477 case AArch64::CBZX:
478 case AArch64::CBNZW:
479 case AArch64::CBNZX:
480 break;
481 };
482
483 MBP.TrueDest = LastInst->getOperand(1).getMBB();
484 assert(MBP.TrueDest && "expected!");
485 MBP.FalseDest = MBB.getNextNode();
486
487 MBP.ConditionDef = nullptr;
488 MBP.SingleUseCondition = false;
489
490 MBP.LHS = LastInst->getOperand(0);
491 MBP.RHS = MachineOperand::CreateImm(0);
492 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
493 : MachineBranchPredicate::PRED_EQ;
494 return false;
495}
496
499 if (Cond[0].getImm() != -1) {
500 // Regular Bcc
503 } else {
504 // Folded compare-and-branch
505 switch (Cond[1].getImm()) {
506 default:
507 llvm_unreachable("Unknown conditional branch!");
508 case AArch64::CBZW:
509 Cond[1].setImm(AArch64::CBNZW);
510 break;
511 case AArch64::CBNZW:
512 Cond[1].setImm(AArch64::CBZW);
513 break;
514 case AArch64::CBZX:
515 Cond[1].setImm(AArch64::CBNZX);
516 break;
517 case AArch64::CBNZX:
518 Cond[1].setImm(AArch64::CBZX);
519 break;
520 case AArch64::TBZW:
521 Cond[1].setImm(AArch64::TBNZW);
522 break;
523 case AArch64::TBNZW:
524 Cond[1].setImm(AArch64::TBZW);
525 break;
526 case AArch64::TBZX:
527 Cond[1].setImm(AArch64::TBNZX);
528 break;
529 case AArch64::TBNZX:
530 Cond[1].setImm(AArch64::TBZX);
531 break;
532 }
533 }
534
535 return false;
536}
537
539 int *BytesRemoved) const {
541 if (I == MBB.end())
542 return 0;
543
544 if (!isUncondBranchOpcode(I->getOpcode()) &&
545 !isCondBranchOpcode(I->getOpcode()))
546 return 0;
547
548 // Remove the branch.
549 I->eraseFromParent();
550
551 I = MBB.end();
552
553 if (I == MBB.begin()) {
554 if (BytesRemoved)
555 *BytesRemoved = 4;
556 return 1;
557 }
558 --I;
559 if (!isCondBranchOpcode(I->getOpcode())) {
560 if (BytesRemoved)
561 *BytesRemoved = 4;
562 return 1;
563 }
564
565 // Remove the branch.
566 I->eraseFromParent();
567 if (BytesRemoved)
568 *BytesRemoved = 8;
569
570 return 2;
571}
572
573void AArch64InstrInfo::instantiateCondBranch(
576 if (Cond[0].getImm() != -1) {
577 // Regular Bcc
578 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
579 } else {
580 // Folded compare-and-branch
581 // Note that we use addOperand instead of addReg to keep the flags.
582 const MachineInstrBuilder MIB =
583 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
584 if (Cond.size() > 3)
585 MIB.addImm(Cond[3].getImm());
586 MIB.addMBB(TBB);
587 }
588}
589
592 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
593 // Shouldn't be a fall through.
594 assert(TBB && "insertBranch must not be told to insert a fallthrough");
595
596 if (!FBB) {
597 if (Cond.empty()) // Unconditional branch?
598 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
599 else
600 instantiateCondBranch(MBB, DL, TBB, Cond);
601
602 if (BytesAdded)
603 *BytesAdded = 4;
604
605 return 1;
606 }
607
608 // Two-way conditional branch.
609 instantiateCondBranch(MBB, DL, TBB, Cond);
610 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
611
612 if (BytesAdded)
613 *BytesAdded = 8;
614
615 return 2;
616}
617
618// Find the original register that VReg is copied from.
619static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
620 while (Register::isVirtualRegister(VReg)) {
621 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
622 if (!DefMI->isFullCopy())
623 return VReg;
624 VReg = DefMI->getOperand(1).getReg();
625 }
626 return VReg;
627}
628
629// Determine if VReg is defined by an instruction that can be folded into a
630// csel instruction. If so, return the folded opcode, and the replacement
631// register.
632static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
633 unsigned *NewVReg = nullptr) {
634 VReg = removeCopies(MRI, VReg);
636 return 0;
637
638 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
639 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
640 unsigned Opc = 0;
641 unsigned SrcOpNum = 0;
642 switch (DefMI->getOpcode()) {
643 case AArch64::ADDSXri:
644 case AArch64::ADDSWri:
645 // if NZCV is used, do not fold.
646 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
647 return 0;
648 // fall-through to ADDXri and ADDWri.
649 [[fallthrough]];
650 case AArch64::ADDXri:
651 case AArch64::ADDWri:
652 // add x, 1 -> csinc.
653 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
654 DefMI->getOperand(3).getImm() != 0)
655 return 0;
656 SrcOpNum = 1;
657 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
658 break;
659
660 case AArch64::ORNXrr:
661 case AArch64::ORNWrr: {
662 // not x -> csinv, represented as orn dst, xzr, src.
663 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
664 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
665 return 0;
666 SrcOpNum = 2;
667 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
668 break;
669 }
670
671 case AArch64::SUBSXrr:
672 case AArch64::SUBSWrr:
673 // if NZCV is used, do not fold.
674 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
675 return 0;
676 // fall-through to SUBXrr and SUBWrr.
677 [[fallthrough]];
678 case AArch64::SUBXrr:
679 case AArch64::SUBWrr: {
680 // neg x -> csneg, represented as sub dst, xzr, src.
681 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
682 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
683 return 0;
684 SrcOpNum = 2;
685 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
686 break;
687 }
688 default:
689 return 0;
690 }
691 assert(Opc && SrcOpNum && "Missing parameters");
692
693 if (NewVReg)
694 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
695 return Opc;
696}
697
700 Register DstReg, Register TrueReg,
701 Register FalseReg, int &CondCycles,
702 int &TrueCycles,
703 int &FalseCycles) const {
704 // Check register classes.
706 const TargetRegisterClass *RC =
707 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
708 if (!RC)
709 return false;
710
711 // Also need to check the dest regclass, in case we're trying to optimize
712 // something like:
713 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
714 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
715 return false;
716
717 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
718 unsigned ExtraCondLat = Cond.size() != 1;
719
720 // GPRs are handled by csel.
721 // FIXME: Fold in x+1, -x, and ~x when applicable.
722 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
723 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
724 // Single-cycle csel, csinc, csinv, and csneg.
725 CondCycles = 1 + ExtraCondLat;
726 TrueCycles = FalseCycles = 1;
727 if (canFoldIntoCSel(MRI, TrueReg))
728 TrueCycles = 0;
729 else if (canFoldIntoCSel(MRI, FalseReg))
730 FalseCycles = 0;
731 return true;
732 }
733
734 // Scalar floating point is handled by fcsel.
735 // FIXME: Form fabs, fmin, and fmax when applicable.
736 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
737 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
738 CondCycles = 5 + ExtraCondLat;
739 TrueCycles = FalseCycles = 2;
740 return true;
741 }
742
743 // Can't do vectors.
744 return false;
745}
746
749 const DebugLoc &DL, Register DstReg,
751 Register TrueReg, Register FalseReg) const {
753
754 // Parse the condition code, see parseCondBranch() above.
756 switch (Cond.size()) {
757 default:
758 llvm_unreachable("Unknown condition opcode in Cond");
759 case 1: // b.cc
760 CC = AArch64CC::CondCode(Cond[0].getImm());
761 break;
762 case 3: { // cbz/cbnz
763 // We must insert a compare against 0.
764 bool Is64Bit;
765 switch (Cond[1].getImm()) {
766 default:
767 llvm_unreachable("Unknown branch opcode in Cond");
768 case AArch64::CBZW:
769 Is64Bit = false;
771 break;
772 case AArch64::CBZX:
773 Is64Bit = true;
775 break;
776 case AArch64::CBNZW:
777 Is64Bit = false;
779 break;
780 case AArch64::CBNZX:
781 Is64Bit = true;
783 break;
784 }
785 Register SrcReg = Cond[2].getReg();
786 if (Is64Bit) {
787 // cmp reg, #0 is actually subs xzr, reg, #0.
788 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
789 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
790 .addReg(SrcReg)
791 .addImm(0)
792 .addImm(0);
793 } else {
794 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
795 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
796 .addReg(SrcReg)
797 .addImm(0)
798 .addImm(0);
799 }
800 break;
801 }
802 case 4: { // tbz/tbnz
803 // We must insert a tst instruction.
804 switch (Cond[1].getImm()) {
805 default:
806 llvm_unreachable("Unknown branch opcode in Cond");
807 case AArch64::TBZW:
808 case AArch64::TBZX:
810 break;
811 case AArch64::TBNZW:
812 case AArch64::TBNZX:
814 break;
815 }
816 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
817 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
818 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
819 .addReg(Cond[2].getReg())
820 .addImm(
821 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
822 else
823 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
824 .addReg(Cond[2].getReg())
825 .addImm(
826 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
827 break;
828 }
829 }
830
831 unsigned Opc = 0;
832 const TargetRegisterClass *RC = nullptr;
833 bool TryFold = false;
834 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
835 RC = &AArch64::GPR64RegClass;
836 Opc = AArch64::CSELXr;
837 TryFold = true;
838 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
839 RC = &AArch64::GPR32RegClass;
840 Opc = AArch64::CSELWr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
843 RC = &AArch64::FPR64RegClass;
844 Opc = AArch64::FCSELDrrr;
845 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
846 RC = &AArch64::FPR32RegClass;
847 Opc = AArch64::FCSELSrrr;
848 }
849 assert(RC && "Unsupported regclass");
850
851 // Try folding simple instructions into the csel.
852 if (TryFold) {
853 unsigned NewVReg = 0;
854 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
855 if (FoldedOpc) {
856 // The folded opcodes csinc, csinc and csneg apply the operation to
857 // FalseReg, so we need to invert the condition.
859 TrueReg = FalseReg;
860 } else
861 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
862
863 // Fold the operation. Leave any dead instructions for DCE to clean up.
864 if (FoldedOpc) {
865 FalseReg = NewVReg;
866 Opc = FoldedOpc;
867 // The extends the live range of NewVReg.
868 MRI.clearKillFlags(NewVReg);
869 }
870 }
871
872 // Pull all virtual register into the appropriate class.
873 MRI.constrainRegClass(TrueReg, RC);
874 MRI.constrainRegClass(FalseReg, RC);
875
876 // Insert the csel.
877 BuildMI(MBB, I, DL, get(Opc), DstReg)
878 .addReg(TrueReg)
879 .addReg(FalseReg)
880 .addImm(CC);
881}
882
883// Return true if Imm can be loaded into a register by a "cheap" sequence of
884// instructions. For now, "cheap" means at most two instructions.
885static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
886 if (BitSize == 32)
887 return true;
888
889 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
890 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
892 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
893
894 return Is.size() <= 2;
895}
896
897// FIXME: this implementation should be micro-architecture dependent, so a
898// micro-architecture target hook should be introduced here in future.
900 if (Subtarget.hasExynosCheapAsMoveHandling()) {
901 if (isExynosCheapAsMove(MI))
902 return true;
903 return MI.isAsCheapAsAMove();
904 }
905
906 switch (MI.getOpcode()) {
907 default:
908 return MI.isAsCheapAsAMove();
909
910 case AArch64::ADDWrs:
911 case AArch64::ADDXrs:
912 case AArch64::SUBWrs:
913 case AArch64::SUBXrs:
914 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
915
916 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
917 // ORRXri, it is as cheap as MOV.
918 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
919 case AArch64::MOVi32imm:
920 return isCheapImmediate(MI, 32);
921 case AArch64::MOVi64imm:
922 return isCheapImmediate(MI, 64);
923 }
924}
925
927 switch (MI.getOpcode()) {
928 default:
929 return false;
930
931 case AArch64::ADDWrs:
932 case AArch64::ADDXrs:
933 case AArch64::ADDSWrs:
934 case AArch64::ADDSXrs: {
935 unsigned Imm = MI.getOperand(3).getImm();
936 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
937 if (ShiftVal == 0)
938 return true;
939 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
940 }
941
942 case AArch64::ADDWrx:
943 case AArch64::ADDXrx:
944 case AArch64::ADDXrx64:
945 case AArch64::ADDSWrx:
946 case AArch64::ADDSXrx:
947 case AArch64::ADDSXrx64: {
948 unsigned Imm = MI.getOperand(3).getImm();
949 switch (AArch64_AM::getArithExtendType(Imm)) {
950 default:
951 return false;
952 case AArch64_AM::UXTB:
953 case AArch64_AM::UXTH:
954 case AArch64_AM::UXTW:
955 case AArch64_AM::UXTX:
956 return AArch64_AM::getArithShiftValue(Imm) <= 4;
957 }
958 }
959
960 case AArch64::SUBWrs:
961 case AArch64::SUBSWrs: {
962 unsigned Imm = MI.getOperand(3).getImm();
963 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
964 return ShiftVal == 0 ||
965 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
966 }
967
968 case AArch64::SUBXrs:
969 case AArch64::SUBSXrs: {
970 unsigned Imm = MI.getOperand(3).getImm();
971 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
972 return ShiftVal == 0 ||
973 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
974 }
975
976 case AArch64::SUBWrx:
977 case AArch64::SUBXrx:
978 case AArch64::SUBXrx64:
979 case AArch64::SUBSWrx:
980 case AArch64::SUBSXrx:
981 case AArch64::SUBSXrx64: {
982 unsigned Imm = MI.getOperand(3).getImm();
983 switch (AArch64_AM::getArithExtendType(Imm)) {
984 default:
985 return false;
986 case AArch64_AM::UXTB:
987 case AArch64_AM::UXTH:
988 case AArch64_AM::UXTW:
989 case AArch64_AM::UXTX:
990 return AArch64_AM::getArithShiftValue(Imm) == 0;
991 }
992 }
993
994 case AArch64::LDRBBroW:
995 case AArch64::LDRBBroX:
996 case AArch64::LDRBroW:
997 case AArch64::LDRBroX:
998 case AArch64::LDRDroW:
999 case AArch64::LDRDroX:
1000 case AArch64::LDRHHroW:
1001 case AArch64::LDRHHroX:
1002 case AArch64::LDRHroW:
1003 case AArch64::LDRHroX:
1004 case AArch64::LDRQroW:
1005 case AArch64::LDRQroX:
1006 case AArch64::LDRSBWroW:
1007 case AArch64::LDRSBWroX:
1008 case AArch64::LDRSBXroW:
1009 case AArch64::LDRSBXroX:
1010 case AArch64::LDRSHWroW:
1011 case AArch64::LDRSHWroX:
1012 case AArch64::LDRSHXroW:
1013 case AArch64::LDRSHXroX:
1014 case AArch64::LDRSWroW:
1015 case AArch64::LDRSWroX:
1016 case AArch64::LDRSroW:
1017 case AArch64::LDRSroX:
1018 case AArch64::LDRWroW:
1019 case AArch64::LDRWroX:
1020 case AArch64::LDRXroW:
1021 case AArch64::LDRXroX:
1022 case AArch64::PRFMroW:
1023 case AArch64::PRFMroX:
1024 case AArch64::STRBBroW:
1025 case AArch64::STRBBroX:
1026 case AArch64::STRBroW:
1027 case AArch64::STRBroX:
1028 case AArch64::STRDroW:
1029 case AArch64::STRDroX:
1030 case AArch64::STRHHroW:
1031 case AArch64::STRHHroX:
1032 case AArch64::STRHroW:
1033 case AArch64::STRHroX:
1034 case AArch64::STRQroW:
1035 case AArch64::STRQroX:
1036 case AArch64::STRSroW:
1037 case AArch64::STRSroX:
1038 case AArch64::STRWroW:
1039 case AArch64::STRWroX:
1040 case AArch64::STRXroW:
1041 case AArch64::STRXroX: {
1042 unsigned IsSigned = MI.getOperand(3).getImm();
1043 return !IsSigned;
1044 }
1045 }
1046}
1047
1049 unsigned Opc = MI.getOpcode();
1050 switch (Opc) {
1051 default:
1052 return false;
1053 case AArch64::SEH_StackAlloc:
1054 case AArch64::SEH_SaveFPLR:
1055 case AArch64::SEH_SaveFPLR_X:
1056 case AArch64::SEH_SaveReg:
1057 case AArch64::SEH_SaveReg_X:
1058 case AArch64::SEH_SaveRegP:
1059 case AArch64::SEH_SaveRegP_X:
1060 case AArch64::SEH_SaveFReg:
1061 case AArch64::SEH_SaveFReg_X:
1062 case AArch64::SEH_SaveFRegP:
1063 case AArch64::SEH_SaveFRegP_X:
1064 case AArch64::SEH_SetFP:
1065 case AArch64::SEH_AddFP:
1066 case AArch64::SEH_Nop:
1067 case AArch64::SEH_PrologEnd:
1068 case AArch64::SEH_EpilogStart:
1069 case AArch64::SEH_EpilogEnd:
1070 case AArch64::SEH_PACSignLR:
1071 case AArch64::SEH_SaveAnyRegQP:
1072 case AArch64::SEH_SaveAnyRegQPX:
1073 return true;
1074 }
1075}
1076
1078 Register &SrcReg, Register &DstReg,
1079 unsigned &SubIdx) const {
1080 switch (MI.getOpcode()) {
1081 default:
1082 return false;
1083 case AArch64::SBFMXri: // aka sxtw
1084 case AArch64::UBFMXri: // aka uxtw
1085 // Check for the 32 -> 64 bit extension case, these instructions can do
1086 // much more.
1087 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1088 return false;
1089 // This is a signed or unsigned 32 -> 64 bit extension.
1090 SrcReg = MI.getOperand(1).getReg();
1091 DstReg = MI.getOperand(0).getReg();
1092 SubIdx = AArch64::sub_32;
1093 return true;
1094 }
1095}
1096
1098 const MachineInstr &MIa, const MachineInstr &MIb) const {
1100 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1101 int64_t OffsetA = 0, OffsetB = 0;
1102 TypeSize WidthA(0, false), WidthB(0, false);
1103 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1104
1105 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1106 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1107
1110 return false;
1111
1112 // Retrieve the base, offset from the base and width. Width
1113 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1114 // base are identical, and the offset of a lower memory access +
1115 // the width doesn't overlap the offset of a higher memory access,
1116 // then the memory accesses are different.
1117 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1118 // are assumed to have the same scale (vscale).
1119 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1120 WidthA, TRI) &&
1121 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1122 WidthB, TRI)) {
1123 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1124 OffsetAIsScalable == OffsetBIsScalable) {
1125 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1126 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1127 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1128 if (LowWidth.isScalable() == OffsetAIsScalable &&
1129 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1130 return true;
1131 }
1132 }
1133 return false;
1134}
1135
1137 const MachineBasicBlock *MBB,
1138 const MachineFunction &MF) const {
1140 return true;
1141
1142 // Do not move an instruction that can be recognized as a branch target.
1143 if (hasBTISemantics(MI))
1144 return true;
1145
1146 switch (MI.getOpcode()) {
1147 case AArch64::HINT:
1148 // CSDB hints are scheduling barriers.
1149 if (MI.getOperand(0).getImm() == 0x14)
1150 return true;
1151 break;
1152 case AArch64::DSB:
1153 case AArch64::ISB:
1154 // DSB and ISB also are scheduling barriers.
1155 return true;
1156 case AArch64::MSRpstatesvcrImm1:
1157 // SMSTART and SMSTOP are also scheduling barriers.
1158 return true;
1159 default:;
1160 }
1161 if (isSEHInstruction(MI))
1162 return true;
1163 auto Next = std::next(MI.getIterator());
1164 return Next != MBB->end() && Next->isCFIInstruction();
1165}
1166
1167/// analyzeCompare - For a comparison instruction, return the source registers
1168/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1169/// Return true if the comparison instruction can be analyzed.
1171 Register &SrcReg2, int64_t &CmpMask,
1172 int64_t &CmpValue) const {
1173 // The first operand can be a frame index where we'd normally expect a
1174 // register.
1175 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1176 if (!MI.getOperand(1).isReg())
1177 return false;
1178
1179 switch (MI.getOpcode()) {
1180 default:
1181 break;
1182 case AArch64::PTEST_PP:
1183 case AArch64::PTEST_PP_ANY:
1184 SrcReg = MI.getOperand(0).getReg();
1185 SrcReg2 = MI.getOperand(1).getReg();
1186 // Not sure about the mask and value for now...
1187 CmpMask = ~0;
1188 CmpValue = 0;
1189 return true;
1190 case AArch64::SUBSWrr:
1191 case AArch64::SUBSWrs:
1192 case AArch64::SUBSWrx:
1193 case AArch64::SUBSXrr:
1194 case AArch64::SUBSXrs:
1195 case AArch64::SUBSXrx:
1196 case AArch64::ADDSWrr:
1197 case AArch64::ADDSWrs:
1198 case AArch64::ADDSWrx:
1199 case AArch64::ADDSXrr:
1200 case AArch64::ADDSXrs:
1201 case AArch64::ADDSXrx:
1202 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1203 SrcReg = MI.getOperand(1).getReg();
1204 SrcReg2 = MI.getOperand(2).getReg();
1205 CmpMask = ~0;
1206 CmpValue = 0;
1207 return true;
1208 case AArch64::SUBSWri:
1209 case AArch64::ADDSWri:
1210 case AArch64::SUBSXri:
1211 case AArch64::ADDSXri:
1212 SrcReg = MI.getOperand(1).getReg();
1213 SrcReg2 = 0;
1214 CmpMask = ~0;
1215 CmpValue = MI.getOperand(2).getImm();
1216 return true;
1217 case AArch64::ANDSWri:
1218 case AArch64::ANDSXri:
1219 // ANDS does not use the same encoding scheme as the others xxxS
1220 // instructions.
1221 SrcReg = MI.getOperand(1).getReg();
1222 SrcReg2 = 0;
1223 CmpMask = ~0;
1225 MI.getOperand(2).getImm(),
1226 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1227 return true;
1228 }
1229
1230 return false;
1231}
1232
1234 MachineBasicBlock *MBB = Instr.getParent();
1235 assert(MBB && "Can't get MachineBasicBlock here");
1236 MachineFunction *MF = MBB->getParent();
1237 assert(MF && "Can't get MachineFunction here");
1241
1242 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1243 ++OpIdx) {
1244 MachineOperand &MO = Instr.getOperand(OpIdx);
1245 const TargetRegisterClass *OpRegCstraints =
1246 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1247
1248 // If there's no constraint, there's nothing to do.
1249 if (!OpRegCstraints)
1250 continue;
1251 // If the operand is a frame index, there's nothing to do here.
1252 // A frame index operand will resolve correctly during PEI.
1253 if (MO.isFI())
1254 continue;
1255
1256 assert(MO.isReg() &&
1257 "Operand has register constraints without being a register!");
1258
1259 Register Reg = MO.getReg();
1260 if (Reg.isPhysical()) {
1261 if (!OpRegCstraints->contains(Reg))
1262 return false;
1263 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1264 !MRI->constrainRegClass(Reg, OpRegCstraints))
1265 return false;
1266 }
1267
1268 return true;
1269}
1270
1271/// Return the opcode that does not set flags when possible - otherwise
1272/// return the original opcode. The caller is responsible to do the actual
1273/// substitution and legality checking.
1275 // Don't convert all compare instructions, because for some the zero register
1276 // encoding becomes the sp register.
1277 bool MIDefinesZeroReg = false;
1278 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1279 MIDefinesZeroReg = true;
1280
1281 switch (MI.getOpcode()) {
1282 default:
1283 return MI.getOpcode();
1284 case AArch64::ADDSWrr:
1285 return AArch64::ADDWrr;
1286 case AArch64::ADDSWri:
1287 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1288 case AArch64::ADDSWrs:
1289 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1290 case AArch64::ADDSWrx:
1291 return AArch64::ADDWrx;
1292 case AArch64::ADDSXrr:
1293 return AArch64::ADDXrr;
1294 case AArch64::ADDSXri:
1295 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1296 case AArch64::ADDSXrs:
1297 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1298 case AArch64::ADDSXrx:
1299 return AArch64::ADDXrx;
1300 case AArch64::SUBSWrr:
1301 return AArch64::SUBWrr;
1302 case AArch64::SUBSWri:
1303 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1304 case AArch64::SUBSWrs:
1305 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1306 case AArch64::SUBSWrx:
1307 return AArch64::SUBWrx;
1308 case AArch64::SUBSXrr:
1309 return AArch64::SUBXrr;
1310 case AArch64::SUBSXri:
1311 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1312 case AArch64::SUBSXrs:
1313 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1314 case AArch64::SUBSXrx:
1315 return AArch64::SUBXrx;
1316 }
1317}
1318
1319enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1320
1321/// True when condition flags are accessed (either by writing or reading)
1322/// on the instruction trace starting at From and ending at To.
1323///
1324/// Note: If From and To are from different blocks it's assumed CC are accessed
1325/// on the path.
1328 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1329 // Early exit if To is at the beginning of the BB.
1330 if (To == To->getParent()->begin())
1331 return true;
1332
1333 // Check whether the instructions are in the same basic block
1334 // If not, assume the condition flags might get modified somewhere.
1335 if (To->getParent() != From->getParent())
1336 return true;
1337
1338 // From must be above To.
1339 assert(std::any_of(
1340 ++To.getReverse(), To->getParent()->rend(),
1341 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1342
1343 // We iterate backward starting at \p To until we hit \p From.
1344 for (const MachineInstr &Instr :
1345 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1346 if (((AccessToCheck & AK_Write) &&
1347 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1348 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1349 return true;
1350 }
1351 return false;
1352}
1353
1354/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1355/// operation which could set the flags in an identical manner
1356bool AArch64InstrInfo::optimizePTestInstr(
1357 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1358 const MachineRegisterInfo *MRI) const {
1359 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1360 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1361 auto NewOp = Pred->getOpcode();
1362 bool OpChanged = false;
1363
1364 unsigned MaskOpcode = Mask->getOpcode();
1365 unsigned PredOpcode = Pred->getOpcode();
1366 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1367 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1368
1369 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1370 getElementSizeForOpcode(MaskOpcode) ==
1371 getElementSizeForOpcode(PredOpcode) &&
1372 Mask->getOperand(1).getImm() == 31) {
1373 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1374 // redundant since WHILE performs an implicit PTEST with an all active
1375 // mask. Must be an all active predicate of matching element size.
1376
1377 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1378 // PTEST_LIKE instruction uses the same all active mask and the element
1379 // size matches. If the PTEST has a condition of any then it is always
1380 // redundant.
1381 if (PredIsPTestLike) {
1382 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1383 if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1384 return false;
1385 }
1386
1387 // Fallthough to simply remove the PTEST.
1388 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1389 PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1390 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1391 // instruction that sets the flags as PTEST would. This is only valid when
1392 // the condition is any.
1393
1394 // Fallthough to simply remove the PTEST.
1395 } else if (PredIsPTestLike) {
1396 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1397 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1398 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1399 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1400 // performed by the compare could consider fewer lanes for these element
1401 // sizes.
1402 //
1403 // For example, consider
1404 //
1405 // ptrue p0.b ; P0=1111-1111-1111-1111
1406 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1407 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1408 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1409 // ; ^ last active
1410 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1411 // ; ^ last active
1412 //
1413 // where the compare generates a canonical all active 32-bit predicate
1414 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1415 // active flag, whereas the PTEST instruction with the same mask doesn't.
1416 // For PTEST_ANY this doesn't apply as the flags in this case would be
1417 // identical regardless of element size.
1418 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1419 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1420 if ((Mask != PTestLikeMask) ||
1421 (PredElementSize != AArch64::ElementSizeB &&
1422 PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1423 return false;
1424
1425 // Fallthough to simply remove the PTEST.
1426 } else {
1427 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1428 // opcode so the PTEST becomes redundant.
1429 switch (PredOpcode) {
1430 case AArch64::AND_PPzPP:
1431 case AArch64::BIC_PPzPP:
1432 case AArch64::EOR_PPzPP:
1433 case AArch64::NAND_PPzPP:
1434 case AArch64::NOR_PPzPP:
1435 case AArch64::ORN_PPzPP:
1436 case AArch64::ORR_PPzPP:
1437 case AArch64::BRKA_PPzP:
1438 case AArch64::BRKPA_PPzPP:
1439 case AArch64::BRKB_PPzP:
1440 case AArch64::BRKPB_PPzPP:
1441 case AArch64::RDFFR_PPz: {
1442 // Check to see if our mask is the same. If not the resulting flag bits
1443 // may be different and we can't remove the ptest.
1444 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1445 if (Mask != PredMask)
1446 return false;
1447 break;
1448 }
1449 case AArch64::BRKN_PPzP: {
1450 // BRKN uses an all active implicit mask to set flags unlike the other
1451 // flag-setting instructions.
1452 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1453 if ((MaskOpcode != AArch64::PTRUE_B) ||
1454 (Mask->getOperand(1).getImm() != 31))
1455 return false;
1456 break;
1457 }
1458 case AArch64::PTRUE_B:
1459 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1460 break;
1461 default:
1462 // Bail out if we don't recognize the input
1463 return false;
1464 }
1465
1466 NewOp = convertToFlagSettingOpc(PredOpcode);
1467 OpChanged = true;
1468 }
1469
1471
1472 // If another instruction between Pred and PTest accesses flags, don't remove
1473 // the ptest or update the earlier instruction to modify them.
1474 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1475 return false;
1476
1477 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1478 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1479 // operand to be replaced with an equivalent instruction that also sets the
1480 // flags.
1481 Pred->setDesc(get(NewOp));
1482 PTest->eraseFromParent();
1483 if (OpChanged) {
1484 bool succeeded = UpdateOperandRegClass(*Pred);
1485 (void)succeeded;
1486 assert(succeeded && "Operands have incompatible register classes!");
1487 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1488 }
1489
1490 // Ensure that the flags def is live.
1491 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1492 unsigned i = 0, e = Pred->getNumOperands();
1493 for (; i != e; ++i) {
1494 MachineOperand &MO = Pred->getOperand(i);
1495 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1496 MO.setIsDead(false);
1497 break;
1498 }
1499 }
1500 }
1501 return true;
1502}
1503
1504/// Try to optimize a compare instruction. A compare instruction is an
1505/// instruction which produces AArch64::NZCV. It can be truly compare
1506/// instruction
1507/// when there are no uses of its destination register.
1508///
1509/// The following steps are tried in order:
1510/// 1. Convert CmpInstr into an unconditional version.
1511/// 2. Remove CmpInstr if above there is an instruction producing a needed
1512/// condition code or an instruction which can be converted into such an
1513/// instruction.
1514/// Only comparison with zero is supported.
1516 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1517 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1518 assert(CmpInstr.getParent());
1519 assert(MRI);
1520
1521 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1522 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1523 if (DeadNZCVIdx != -1) {
1524 if (CmpInstr.definesRegister(AArch64::WZR) ||
1525 CmpInstr.definesRegister(AArch64::XZR)) {
1526 CmpInstr.eraseFromParent();
1527 return true;
1528 }
1529 unsigned Opc = CmpInstr.getOpcode();
1530 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1531 if (NewOpc == Opc)
1532 return false;
1533 const MCInstrDesc &MCID = get(NewOpc);
1534 CmpInstr.setDesc(MCID);
1535 CmpInstr.removeOperand(DeadNZCVIdx);
1536 bool succeeded = UpdateOperandRegClass(CmpInstr);
1537 (void)succeeded;
1538 assert(succeeded && "Some operands reg class are incompatible!");
1539 return true;
1540 }
1541
1542 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1543 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1544 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1545
1546 if (SrcReg2 != 0)
1547 return false;
1548
1549 // CmpInstr is a Compare instruction if destination register is not used.
1550 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1551 return false;
1552
1553 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1554 return true;
1555 return (CmpValue == 0 || CmpValue == 1) &&
1556 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1557}
1558
1559/// Get opcode of S version of Instr.
1560/// If Instr is S version its opcode is returned.
1561/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1562/// or we are not interested in it.
1563static unsigned sForm(MachineInstr &Instr) {
1564 switch (Instr.getOpcode()) {
1565 default:
1566 return AArch64::INSTRUCTION_LIST_END;
1567
1568 case AArch64::ADDSWrr:
1569 case AArch64::ADDSWri:
1570 case AArch64::ADDSXrr:
1571 case AArch64::ADDSXri:
1572 case AArch64::SUBSWrr:
1573 case AArch64::SUBSWri:
1574 case AArch64::SUBSXrr:
1575 case AArch64::SUBSXri:
1576 return Instr.getOpcode();
1577
1578 case AArch64::ADDWrr:
1579 return AArch64::ADDSWrr;
1580 case AArch64::ADDWri:
1581 return AArch64::ADDSWri;
1582 case AArch64::ADDXrr:
1583 return AArch64::ADDSXrr;
1584 case AArch64::ADDXri:
1585 return AArch64::ADDSXri;
1586 case AArch64::ADCWr:
1587 return AArch64::ADCSWr;
1588 case AArch64::ADCXr:
1589 return AArch64::ADCSXr;
1590 case AArch64::SUBWrr:
1591 return AArch64::SUBSWrr;
1592 case AArch64::SUBWri:
1593 return AArch64::SUBSWri;
1594 case AArch64::SUBXrr:
1595 return AArch64::SUBSXrr;
1596 case AArch64::SUBXri:
1597 return AArch64::SUBSXri;
1598 case AArch64::SBCWr:
1599 return AArch64::SBCSWr;
1600 case AArch64::SBCXr:
1601 return AArch64::SBCSXr;
1602 case AArch64::ANDWri:
1603 return AArch64::ANDSWri;
1604 case AArch64::ANDXri:
1605 return AArch64::ANDSXri;
1606 }
1607}
1608
1609/// Check if AArch64::NZCV should be alive in successors of MBB.
1611 for (auto *BB : MBB->successors())
1612 if (BB->isLiveIn(AArch64::NZCV))
1613 return true;
1614 return false;
1615}
1616
1617/// \returns The condition code operand index for \p Instr if it is a branch
1618/// or select and -1 otherwise.
1619static int
1621 switch (Instr.getOpcode()) {
1622 default:
1623 return -1;
1624
1625 case AArch64::Bcc: {
1626 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1627 assert(Idx >= 2);
1628 return Idx - 2;
1629 }
1630
1631 case AArch64::CSINVWr:
1632 case AArch64::CSINVXr:
1633 case AArch64::CSINCWr:
1634 case AArch64::CSINCXr:
1635 case AArch64::CSELWr:
1636 case AArch64::CSELXr:
1637 case AArch64::CSNEGWr:
1638 case AArch64::CSNEGXr:
1639 case AArch64::FCSELSrrr:
1640 case AArch64::FCSELDrrr: {
1641 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1642 assert(Idx >= 1);
1643 return Idx - 1;
1644 }
1645 }
1646}
1647
1648/// Find a condition code used by the instruction.
1649/// Returns AArch64CC::Invalid if either the instruction does not use condition
1650/// codes or we don't optimize CmpInstr in the presence of such instructions.
1653 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1654 Instr.getOperand(CCIdx).getImm())
1656}
1657
1660 UsedNZCV UsedFlags;
1661 switch (CC) {
1662 default:
1663 break;
1664
1665 case AArch64CC::EQ: // Z set
1666 case AArch64CC::NE: // Z clear
1667 UsedFlags.Z = true;
1668 break;
1669
1670 case AArch64CC::HI: // Z clear and C set
1671 case AArch64CC::LS: // Z set or C clear
1672 UsedFlags.Z = true;
1673 [[fallthrough]];
1674 case AArch64CC::HS: // C set
1675 case AArch64CC::LO: // C clear
1676 UsedFlags.C = true;
1677 break;
1678
1679 case AArch64CC::MI: // N set
1680 case AArch64CC::PL: // N clear
1681 UsedFlags.N = true;
1682 break;
1683
1684 case AArch64CC::VS: // V set
1685 case AArch64CC::VC: // V clear
1686 UsedFlags.V = true;
1687 break;
1688
1689 case AArch64CC::GT: // Z clear, N and V the same
1690 case AArch64CC::LE: // Z set, N and V differ
1691 UsedFlags.Z = true;
1692 [[fallthrough]];
1693 case AArch64CC::GE: // N and V the same
1694 case AArch64CC::LT: // N and V differ
1695 UsedFlags.N = true;
1696 UsedFlags.V = true;
1697 break;
1698 }
1699 return UsedFlags;
1700}
1701
1702/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1703/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1704/// \returns std::nullopt otherwise.
1705///
1706/// Collect instructions using that flags in \p CCUseInstrs if provided.
1707std::optional<UsedNZCV>
1709 const TargetRegisterInfo &TRI,
1710 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1711 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1712 if (MI.getParent() != CmpParent)
1713 return std::nullopt;
1714
1715 if (areCFlagsAliveInSuccessors(CmpParent))
1716 return std::nullopt;
1717
1718 UsedNZCV NZCVUsedAfterCmp;
1720 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1721 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1723 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1724 return std::nullopt;
1725 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1726 if (CCUseInstrs)
1727 CCUseInstrs->push_back(&Instr);
1728 }
1729 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1730 break;
1731 }
1732 return NZCVUsedAfterCmp;
1733}
1734
1735static bool isADDSRegImm(unsigned Opcode) {
1736 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1737}
1738
1739static bool isSUBSRegImm(unsigned Opcode) {
1740 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1741}
1742
1743/// Check if CmpInstr can be substituted by MI.
1744///
1745/// CmpInstr can be substituted:
1746/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1747/// - and, MI and CmpInstr are from the same MachineBB
1748/// - and, condition flags are not alive in successors of the CmpInstr parent
1749/// - and, if MI opcode is the S form there must be no defs of flags between
1750/// MI and CmpInstr
1751/// or if MI opcode is not the S form there must be neither defs of flags
1752/// nor uses of flags between MI and CmpInstr.
1753/// - and, if C/V flags are not used after CmpInstr
1754/// or if N flag is used but MI produces poison value if signed overflow
1755/// occurs.
1757 const TargetRegisterInfo &TRI) {
1758 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1759 // that may or may not set flags.
1760 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1761
1762 const unsigned CmpOpcode = CmpInstr.getOpcode();
1763 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1764 return false;
1765
1766 assert((CmpInstr.getOperand(2).isImm() &&
1767 CmpInstr.getOperand(2).getImm() == 0) &&
1768 "Caller guarantees that CmpInstr compares with constant 0");
1769
1770 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1771 if (!NZVCUsed || NZVCUsed->C)
1772 return false;
1773
1774 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1775 // '%vreg = add ...' or '%vreg = sub ...'.
1776 // Condition flag V is used to indicate signed overflow.
1777 // 1) MI and CmpInstr set N and V to the same value.
1778 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1779 // signed overflow occurs, so CmpInstr could still be simplified away.
1780 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1781 return false;
1782
1783 AccessKind AccessToCheck = AK_Write;
1784 if (sForm(MI) != MI.getOpcode())
1785 AccessToCheck = AK_All;
1786 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1787}
1788
1789/// Substitute an instruction comparing to zero with another instruction
1790/// which produces needed condition flags.
1791///
1792/// Return true on success.
1793bool AArch64InstrInfo::substituteCmpToZero(
1794 MachineInstr &CmpInstr, unsigned SrcReg,
1795 const MachineRegisterInfo &MRI) const {
1796 // Get the unique definition of SrcReg.
1797 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1798 if (!MI)
1799 return false;
1800
1802
1803 unsigned NewOpc = sForm(*MI);
1804 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1805 return false;
1806
1807 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1808 return false;
1809
1810 // Update the instruction to set NZCV.
1811 MI->setDesc(get(NewOpc));
1812 CmpInstr.eraseFromParent();
1813 bool succeeded = UpdateOperandRegClass(*MI);
1814 (void)succeeded;
1815 assert(succeeded && "Some operands reg class are incompatible!");
1816 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1817 return true;
1818}
1819
1820/// \returns True if \p CmpInstr can be removed.
1821///
1822/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1823/// codes used in \p CCUseInstrs must be inverted.
1825 int CmpValue, const TargetRegisterInfo &TRI,
1827 bool &IsInvertCC) {
1828 assert((CmpValue == 0 || CmpValue == 1) &&
1829 "Only comparisons to 0 or 1 considered for removal!");
1830
1831 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1832 unsigned MIOpc = MI.getOpcode();
1833 if (MIOpc == AArch64::CSINCWr) {
1834 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1835 MI.getOperand(2).getReg() != AArch64::WZR)
1836 return false;
1837 } else if (MIOpc == AArch64::CSINCXr) {
1838 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1839 MI.getOperand(2).getReg() != AArch64::XZR)
1840 return false;
1841 } else {
1842 return false;
1843 }
1845 if (MICC == AArch64CC::Invalid)
1846 return false;
1847
1848 // NZCV needs to be defined
1849 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
1850 return false;
1851
1852 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1853 const unsigned CmpOpcode = CmpInstr.getOpcode();
1854 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1855 if (CmpValue && !IsSubsRegImm)
1856 return false;
1857 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1858 return false;
1859
1860 // MI conditions allowed: eq, ne, mi, pl
1861 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1862 if (MIUsedNZCV.C || MIUsedNZCV.V)
1863 return false;
1864
1865 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1866 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1867 // Condition flags are not used in CmpInstr basic block successors and only
1868 // Z or N flags allowed to be used after CmpInstr within its basic block
1869 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1870 return false;
1871 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1872 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1873 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1874 return false;
1875 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1876 if (MIUsedNZCV.N && !CmpValue)
1877 return false;
1878
1879 // There must be no defs of flags between MI and CmpInstr
1880 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1881 return false;
1882
1883 // Condition code is inverted in the following cases:
1884 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1885 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1886 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1887 (!CmpValue && MICC == AArch64CC::NE);
1888 return true;
1889}
1890
1891/// Remove comparison in csinc-cmp sequence
1892///
1893/// Examples:
1894/// 1. \code
1895/// csinc w9, wzr, wzr, ne
1896/// cmp w9, #0
1897/// b.eq
1898/// \endcode
1899/// to
1900/// \code
1901/// csinc w9, wzr, wzr, ne
1902/// b.ne
1903/// \endcode
1904///
1905/// 2. \code
1906/// csinc x2, xzr, xzr, mi
1907/// cmp x2, #1
1908/// b.pl
1909/// \endcode
1910/// to
1911/// \code
1912/// csinc x2, xzr, xzr, mi
1913/// b.pl
1914/// \endcode
1915///
1916/// \param CmpInstr comparison instruction
1917/// \return True when comparison removed
1918bool AArch64InstrInfo::removeCmpToZeroOrOne(
1919 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1920 const MachineRegisterInfo &MRI) const {
1921 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1922 if (!MI)
1923 return false;
1926 bool IsInvertCC = false;
1927 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1928 IsInvertCC))
1929 return false;
1930 // Make transformation
1931 CmpInstr.eraseFromParent();
1932 if (IsInvertCC) {
1933 // Invert condition codes in CmpInstr CC users
1934 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1936 assert(Idx >= 0 && "Unexpected instruction using CC.");
1937 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1939 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1940 CCOperand.setImm(CCUse);
1941 }
1942 }
1943 return true;
1944}
1945
1947 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1948 MI.getOpcode() != AArch64::CATCHRET)
1949 return false;
1950
1951 MachineBasicBlock &MBB = *MI.getParent();
1952 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1953 auto TRI = Subtarget.getRegisterInfo();
1954 DebugLoc DL = MI.getDebugLoc();
1955
1956 if (MI.getOpcode() == AArch64::CATCHRET) {
1957 // Skip to the first instruction before the epilog.
1958 const TargetInstrInfo *TII =
1960 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1962 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1963 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1964 FirstEpilogSEH != MBB.begin())
1965 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1966 if (FirstEpilogSEH != MBB.begin())
1967 FirstEpilogSEH = std::next(FirstEpilogSEH);
1968 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1969 .addReg(AArch64::X0, RegState::Define)
1970 .addMBB(TargetMBB);
1971 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1972 .addReg(AArch64::X0, RegState::Define)
1973 .addReg(AArch64::X0)
1974 .addMBB(TargetMBB)
1975 .addImm(0);
1976 return true;
1977 }
1978
1979 Register Reg = MI.getOperand(0).getReg();
1981 if (M.getStackProtectorGuard() == "sysreg") {
1982 const AArch64SysReg::SysReg *SrcReg =
1983 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1984 if (!SrcReg)
1985 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1986
1987 // mrs xN, sysreg
1988 BuildMI(MBB, MI, DL, get(AArch64::MRS))
1990 .addImm(SrcReg->Encoding);
1991 int Offset = M.getStackProtectorGuardOffset();
1992 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1993 // ldr xN, [xN, #offset]
1994 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1995 .addDef(Reg)
1996 .addUse(Reg, RegState::Kill)
1997 .addImm(Offset / 8);
1998 } else if (Offset >= -256 && Offset <= 255) {
1999 // ldur xN, [xN, #offset]
2000 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2001 .addDef(Reg)
2002 .addUse(Reg, RegState::Kill)
2003 .addImm(Offset);
2004 } else if (Offset >= -4095 && Offset <= 4095) {
2005 if (Offset > 0) {
2006 // add xN, xN, #offset
2007 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2008 .addDef(Reg)
2009 .addUse(Reg, RegState::Kill)
2010 .addImm(Offset)
2011 .addImm(0);
2012 } else {
2013 // sub xN, xN, #offset
2014 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2015 .addDef(Reg)
2016 .addUse(Reg, RegState::Kill)
2017 .addImm(-Offset)
2018 .addImm(0);
2019 }
2020 // ldr xN, [xN]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(0);
2025 } else {
2026 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2027 // than 23760.
2028 // It might be nice to use AArch64::MOVi32imm here, which would get
2029 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2030 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2031 // AArch64FrameLowering might help us find such a scratch register
2032 // though. If we failed to find a scratch register, we could emit a
2033 // stream of add instructions to build up the immediate. Or, we could try
2034 // to insert a AArch64::MOVi32imm before register allocation so that we
2035 // didn't need to scavenge for a scratch register.
2036 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2037 }
2038 MBB.erase(MI);
2039 return true;
2040 }
2041
2042 const GlobalValue *GV =
2043 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2044 const TargetMachine &TM = MBB.getParent()->getTarget();
2045 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2046 const unsigned char MO_NC = AArch64II::MO_NC;
2047
2048 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2049 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2050 .addGlobalAddress(GV, 0, OpFlags);
2051 if (Subtarget.isTargetILP32()) {
2052 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2053 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2054 .addDef(Reg32, RegState::Dead)
2055 .addUse(Reg, RegState::Kill)
2056 .addImm(0)
2057 .addMemOperand(*MI.memoperands_begin())
2059 } else {
2060 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2061 .addReg(Reg, RegState::Kill)
2062 .addImm(0)
2063 .addMemOperand(*MI.memoperands_begin());
2064 }
2065 } else if (TM.getCodeModel() == CodeModel::Large) {
2066 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2067 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2068 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2069 .addImm(0);
2070 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2071 .addReg(Reg, RegState::Kill)
2072 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2073 .addImm(16);
2074 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2075 .addReg(Reg, RegState::Kill)
2076 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2077 .addImm(32);
2078 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2079 .addReg(Reg, RegState::Kill)
2081 .addImm(48);
2082 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2083 .addReg(Reg, RegState::Kill)
2084 .addImm(0)
2085 .addMemOperand(*MI.memoperands_begin());
2086 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2087 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2088 .addGlobalAddress(GV, 0, OpFlags);
2089 } else {
2090 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2091 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2092 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2093 if (Subtarget.isTargetILP32()) {
2094 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2095 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2096 .addDef(Reg32, RegState::Dead)
2097 .addUse(Reg, RegState::Kill)
2098 .addGlobalAddress(GV, 0, LoFlags)
2099 .addMemOperand(*MI.memoperands_begin())
2101 } else {
2102 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2103 .addReg(Reg, RegState::Kill)
2104 .addGlobalAddress(GV, 0, LoFlags)
2105 .addMemOperand(*MI.memoperands_begin());
2106 }
2107 }
2108
2109 MBB.erase(MI);
2110
2111 return true;
2112}
2113
2114// Return true if this instruction simply sets its single destination register
2115// to zero. This is equivalent to a register rename of the zero-register.
2117 switch (MI.getOpcode()) {
2118 default:
2119 break;
2120 case AArch64::MOVZWi:
2121 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2122 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2123 assert(MI.getDesc().getNumOperands() == 3 &&
2124 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2125 return true;
2126 }
2127 break;
2128 case AArch64::ANDWri: // and Rd, Rzr, #imm
2129 return MI.getOperand(1).getReg() == AArch64::WZR;
2130 case AArch64::ANDXri:
2131 return MI.getOperand(1).getReg() == AArch64::XZR;
2132 case TargetOpcode::COPY:
2133 return MI.getOperand(1).getReg() == AArch64::WZR;
2134 }
2135 return false;
2136}
2137
2138// Return true if this instruction simply renames a general register without
2139// modifying bits.
2141 switch (MI.getOpcode()) {
2142 default:
2143 break;
2144 case TargetOpcode::COPY: {
2145 // GPR32 copies will by lowered to ORRXrs
2146 Register DstReg = MI.getOperand(0).getReg();
2147 return (AArch64::GPR32RegClass.contains(DstReg) ||
2148 AArch64::GPR64RegClass.contains(DstReg));
2149 }
2150 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2151 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2152 assert(MI.getDesc().getNumOperands() == 4 &&
2153 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2154 return true;
2155 }
2156 break;
2157 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2158 if (MI.getOperand(2).getImm() == 0) {
2159 assert(MI.getDesc().getNumOperands() == 4 &&
2160 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2161 return true;
2162 }
2163 break;
2164 }
2165 return false;
2166}
2167
2168// Return true if this instruction simply renames a general register without
2169// modifying bits.
2171 switch (MI.getOpcode()) {
2172 default:
2173 break;
2174 case TargetOpcode::COPY: {
2175 Register DstReg = MI.getOperand(0).getReg();
2176 return AArch64::FPR128RegClass.contains(DstReg);
2177 }
2178 case AArch64::ORRv16i8:
2179 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2180 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2181 "invalid ORRv16i8 operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2190 int &FrameIndex) const {
2191 switch (MI.getOpcode()) {
2192 default:
2193 break;
2194 case AArch64::LDRWui:
2195 case AArch64::LDRXui:
2196 case AArch64::LDRBui:
2197 case AArch64::LDRHui:
2198 case AArch64::LDRSui:
2199 case AArch64::LDRDui:
2200 case AArch64::LDRQui:
2201 case AArch64::LDR_PXI:
2202 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2203 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2204 FrameIndex = MI.getOperand(1).getIndex();
2205 return MI.getOperand(0).getReg();
2206 }
2207 break;
2208 }
2209
2210 return 0;
2211}
2212
2214 int &FrameIndex) const {
2215 switch (MI.getOpcode()) {
2216 default:
2217 break;
2218 case AArch64::STRWui:
2219 case AArch64::STRXui:
2220 case AArch64::STRBui:
2221 case AArch64::STRHui:
2222 case AArch64::STRSui:
2223 case AArch64::STRDui:
2224 case AArch64::STRQui:
2225 case AArch64::STR_PXI:
2226 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2227 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2228 FrameIndex = MI.getOperand(1).getIndex();
2229 return MI.getOperand(0).getReg();
2230 }
2231 break;
2232 }
2233 return 0;
2234}
2235
2236/// Check all MachineMemOperands for a hint to suppress pairing.
2238 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2239 return MMO->getFlags() & MOSuppressPair;
2240 });
2241}
2242
2243/// Set a flag on the first MachineMemOperand to suppress pairing.
2245 if (MI.memoperands_empty())
2246 return;
2247 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2248}
2249
2250/// Check all MachineMemOperands for a hint that the load/store is strided.
2252 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2253 return MMO->getFlags() & MOStridedAccess;
2254 });
2255}
2256
2258 switch (Opc) {
2259 default:
2260 return false;
2261 case AArch64::STURSi:
2262 case AArch64::STRSpre:
2263 case AArch64::STURDi:
2264 case AArch64::STRDpre:
2265 case AArch64::STURQi:
2266 case AArch64::STRQpre:
2267 case AArch64::STURBBi:
2268 case AArch64::STURHHi:
2269 case AArch64::STURWi:
2270 case AArch64::STRWpre:
2271 case AArch64::STURXi:
2272 case AArch64::STRXpre:
2273 case AArch64::LDURSi:
2274 case AArch64::LDRSpre:
2275 case AArch64::LDURDi:
2276 case AArch64::LDRDpre:
2277 case AArch64::LDURQi:
2278 case AArch64::LDRQpre:
2279 case AArch64::LDURWi:
2280 case AArch64::LDRWpre:
2281 case AArch64::LDURXi:
2282 case AArch64::LDRXpre:
2283 case AArch64::LDRSWpre:
2284 case AArch64::LDURSWi:
2285 case AArch64::LDURHHi:
2286 case AArch64::LDURBBi:
2287 case AArch64::LDURSBWi:
2288 case AArch64::LDURSHWi:
2289 return true;
2290 }
2291}
2292
2293std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2294 switch (Opc) {
2295 default: return {};
2296 case AArch64::PRFMui: return AArch64::PRFUMi;
2297 case AArch64::LDRXui: return AArch64::LDURXi;
2298 case AArch64::LDRWui: return AArch64::LDURWi;
2299 case AArch64::LDRBui: return AArch64::LDURBi;
2300 case AArch64::LDRHui: return AArch64::LDURHi;
2301 case AArch64::LDRSui: return AArch64::LDURSi;
2302 case AArch64::LDRDui: return AArch64::LDURDi;
2303 case AArch64::LDRQui: return AArch64::LDURQi;
2304 case AArch64::LDRBBui: return AArch64::LDURBBi;
2305 case AArch64::LDRHHui: return AArch64::LDURHHi;
2306 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2307 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2308 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2309 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2310 case AArch64::LDRSWui: return AArch64::LDURSWi;
2311 case AArch64::STRXui: return AArch64::STURXi;
2312 case AArch64::STRWui: return AArch64::STURWi;
2313 case AArch64::STRBui: return AArch64::STURBi;
2314 case AArch64::STRHui: return AArch64::STURHi;
2315 case AArch64::STRSui: return AArch64::STURSi;
2316 case AArch64::STRDui: return AArch64::STURDi;
2317 case AArch64::STRQui: return AArch64::STURQi;
2318 case AArch64::STRBBui: return AArch64::STURBBi;
2319 case AArch64::STRHHui: return AArch64::STURHHi;
2320 }
2321}
2322
2324 switch (Opc) {
2325 default:
2326 return 2;
2327 case AArch64::LDPXi:
2328 case AArch64::LDPDi:
2329 case AArch64::STPXi:
2330 case AArch64::STPDi:
2331 case AArch64::LDNPXi:
2332 case AArch64::LDNPDi:
2333 case AArch64::STNPXi:
2334 case AArch64::STNPDi:
2335 case AArch64::LDPQi:
2336 case AArch64::STPQi:
2337 case AArch64::LDNPQi:
2338 case AArch64::STNPQi:
2339 case AArch64::LDPWi:
2340 case AArch64::LDPSi:
2341 case AArch64::STPWi:
2342 case AArch64::STPSi:
2343 case AArch64::LDNPWi:
2344 case AArch64::LDNPSi:
2345 case AArch64::STNPWi:
2346 case AArch64::STNPSi:
2347 case AArch64::LDG:
2348 case AArch64::STGPi:
2349
2350 case AArch64::LD1B_IMM:
2351 case AArch64::LD1B_H_IMM:
2352 case AArch64::LD1B_S_IMM:
2353 case AArch64::LD1B_D_IMM:
2354 case AArch64::LD1SB_H_IMM:
2355 case AArch64::LD1SB_S_IMM:
2356 case AArch64::LD1SB_D_IMM:
2357 case AArch64::LD1H_IMM:
2358 case AArch64::LD1H_S_IMM:
2359 case AArch64::LD1H_D_IMM:
2360 case AArch64::LD1SH_S_IMM:
2361 case AArch64::LD1SH_D_IMM:
2362 case AArch64::LD1W_IMM:
2363 case AArch64::LD1W_D_IMM:
2364 case AArch64::LD1SW_D_IMM:
2365 case AArch64::LD1D_IMM:
2366
2367 case AArch64::LD2B_IMM:
2368 case AArch64::LD2H_IMM:
2369 case AArch64::LD2W_IMM:
2370 case AArch64::LD2D_IMM:
2371 case AArch64::LD3B_IMM:
2372 case AArch64::LD3H_IMM:
2373 case AArch64::LD3W_IMM:
2374 case AArch64::LD3D_IMM:
2375 case AArch64::LD4B_IMM:
2376 case AArch64::LD4H_IMM:
2377 case AArch64::LD4W_IMM:
2378 case AArch64::LD4D_IMM:
2379
2380 case AArch64::ST1B_IMM:
2381 case AArch64::ST1B_H_IMM:
2382 case AArch64::ST1B_S_IMM:
2383 case AArch64::ST1B_D_IMM:
2384 case AArch64::ST1H_IMM:
2385 case AArch64::ST1H_S_IMM:
2386 case AArch64::ST1H_D_IMM:
2387 case AArch64::ST1W_IMM:
2388 case AArch64::ST1W_D_IMM:
2389 case AArch64::ST1D_IMM:
2390
2391 case AArch64::ST2B_IMM:
2392 case AArch64::ST2H_IMM:
2393 case AArch64::ST2W_IMM:
2394 case AArch64::ST2D_IMM:
2395 case AArch64::ST3B_IMM:
2396 case AArch64::ST3H_IMM:
2397 case AArch64::ST3W_IMM:
2398 case AArch64::ST3D_IMM:
2399 case AArch64::ST4B_IMM:
2400 case AArch64::ST4H_IMM:
2401 case AArch64::ST4W_IMM:
2402 case AArch64::ST4D_IMM:
2403
2404 case AArch64::LD1RB_IMM:
2405 case AArch64::LD1RB_H_IMM:
2406 case AArch64::LD1RB_S_IMM:
2407 case AArch64::LD1RB_D_IMM:
2408 case AArch64::LD1RSB_H_IMM:
2409 case AArch64::LD1RSB_S_IMM:
2410 case AArch64::LD1RSB_D_IMM:
2411 case AArch64::LD1RH_IMM:
2412 case AArch64::LD1RH_S_IMM:
2413 case AArch64::LD1RH_D_IMM:
2414 case AArch64::LD1RSH_S_IMM:
2415 case AArch64::LD1RSH_D_IMM:
2416 case AArch64::LD1RW_IMM:
2417 case AArch64::LD1RW_D_IMM:
2418 case AArch64::LD1RSW_IMM:
2419 case AArch64::LD1RD_IMM:
2420
2421 case AArch64::LDNT1B_ZRI:
2422 case AArch64::LDNT1H_ZRI:
2423 case AArch64::LDNT1W_ZRI:
2424 case AArch64::LDNT1D_ZRI:
2425 case AArch64::STNT1B_ZRI:
2426 case AArch64::STNT1H_ZRI:
2427 case AArch64::STNT1W_ZRI:
2428 case AArch64::STNT1D_ZRI:
2429
2430 case AArch64::LDNF1B_IMM:
2431 case AArch64::LDNF1B_H_IMM:
2432 case AArch64::LDNF1B_S_IMM:
2433 case AArch64::LDNF1B_D_IMM:
2434 case AArch64::LDNF1SB_H_IMM:
2435 case AArch64::LDNF1SB_S_IMM:
2436 case AArch64::LDNF1SB_D_IMM:
2437 case AArch64::LDNF1H_IMM:
2438 case AArch64::LDNF1H_S_IMM:
2439 case AArch64::LDNF1H_D_IMM:
2440 case AArch64::LDNF1SH_S_IMM:
2441 case AArch64::LDNF1SH_D_IMM:
2442 case AArch64::LDNF1W_IMM:
2443 case AArch64::LDNF1W_D_IMM:
2444 case AArch64::LDNF1SW_D_IMM:
2445 case AArch64::LDNF1D_IMM:
2446 return 3;
2447 case AArch64::ADDG:
2448 case AArch64::STGi:
2449 case AArch64::LDR_PXI:
2450 case AArch64::STR_PXI:
2451 return 2;
2452 }
2453}
2454
2456 switch (MI.getOpcode()) {
2457 default:
2458 return false;
2459 // Scaled instructions.
2460 case AArch64::STRSui:
2461 case AArch64::STRDui:
2462 case AArch64::STRQui:
2463 case AArch64::STRXui:
2464 case AArch64::STRWui:
2465 case AArch64::LDRSui:
2466 case AArch64::LDRDui:
2467 case AArch64::LDRQui:
2468 case AArch64::LDRXui:
2469 case AArch64::LDRWui:
2470 case AArch64::LDRSWui:
2471 // Unscaled instructions.
2472 case AArch64::STURSi:
2473 case AArch64::STRSpre:
2474 case AArch64::STURDi:
2475 case AArch64::STRDpre:
2476 case AArch64::STURQi:
2477 case AArch64::STRQpre:
2478 case AArch64::STURWi:
2479 case AArch64::STRWpre:
2480 case AArch64::STURXi:
2481 case AArch64::STRXpre:
2482 case AArch64::LDURSi:
2483 case AArch64::LDRSpre:
2484 case AArch64::LDURDi:
2485 case AArch64::LDRDpre:
2486 case AArch64::LDURQi:
2487 case AArch64::LDRQpre:
2488 case AArch64::LDURWi:
2489 case AArch64::LDRWpre:
2490 case AArch64::LDURXi:
2491 case AArch64::LDRXpre:
2492 case AArch64::LDURSWi:
2493 case AArch64::LDRSWpre:
2494 return true;
2495 }
2496}
2497
2499 switch (MI.getOpcode()) {
2500 default:
2501 assert((!MI.isCall() || !MI.isReturn()) &&
2502 "Unexpected instruction - was a new tail call opcode introduced?");
2503 return false;
2504 case AArch64::TCRETURNdi:
2505 case AArch64::TCRETURNri:
2506 case AArch64::TCRETURNrix16x17:
2507 case AArch64::TCRETURNrix17:
2508 case AArch64::TCRETURNrinotx16:
2509 case AArch64::TCRETURNriALL:
2510 return true;
2511 }
2512}
2513
2515 switch (Opc) {
2516 default:
2517 llvm_unreachable("Opcode has no flag setting equivalent!");
2518 // 32-bit cases:
2519 case AArch64::ADDWri:
2520 return AArch64::ADDSWri;
2521 case AArch64::ADDWrr:
2522 return AArch64::ADDSWrr;
2523 case AArch64::ADDWrs:
2524 return AArch64::ADDSWrs;
2525 case AArch64::ADDWrx:
2526 return AArch64::ADDSWrx;
2527 case AArch64::ANDWri:
2528 return AArch64::ANDSWri;
2529 case AArch64::ANDWrr:
2530 return AArch64::ANDSWrr;
2531 case AArch64::ANDWrs:
2532 return AArch64::ANDSWrs;
2533 case AArch64::BICWrr:
2534 return AArch64::BICSWrr;
2535 case AArch64::BICWrs:
2536 return AArch64::BICSWrs;
2537 case AArch64::SUBWri:
2538 return AArch64::SUBSWri;
2539 case AArch64::SUBWrr:
2540 return AArch64::SUBSWrr;
2541 case AArch64::SUBWrs:
2542 return AArch64::SUBSWrs;
2543 case AArch64::SUBWrx:
2544 return AArch64::SUBSWrx;
2545 // 64-bit cases:
2546 case AArch64::ADDXri:
2547 return AArch64::ADDSXri;
2548 case AArch64::ADDXrr:
2549 return AArch64::ADDSXrr;
2550 case AArch64::ADDXrs:
2551 return AArch64::ADDSXrs;
2552 case AArch64::ADDXrx:
2553 return AArch64::ADDSXrx;
2554 case AArch64::ANDXri:
2555 return AArch64::ANDSXri;
2556 case AArch64::ANDXrr:
2557 return AArch64::ANDSXrr;
2558 case AArch64::ANDXrs:
2559 return AArch64::ANDSXrs;
2560 case AArch64::BICXrr:
2561 return AArch64::BICSXrr;
2562 case AArch64::BICXrs:
2563 return AArch64::BICSXrs;
2564 case AArch64::SUBXri:
2565 return AArch64::SUBSXri;
2566 case AArch64::SUBXrr:
2567 return AArch64::SUBSXrr;
2568 case AArch64::SUBXrs:
2569 return AArch64::SUBSXrs;
2570 case AArch64::SUBXrx:
2571 return AArch64::SUBSXrx;
2572 // SVE instructions:
2573 case AArch64::AND_PPzPP:
2574 return AArch64::ANDS_PPzPP;
2575 case AArch64::BIC_PPzPP:
2576 return AArch64::BICS_PPzPP;
2577 case AArch64::EOR_PPzPP:
2578 return AArch64::EORS_PPzPP;
2579 case AArch64::NAND_PPzPP:
2580 return AArch64::NANDS_PPzPP;
2581 case AArch64::NOR_PPzPP:
2582 return AArch64::NORS_PPzPP;
2583 case AArch64::ORN_PPzPP:
2584 return AArch64::ORNS_PPzPP;
2585 case AArch64::ORR_PPzPP:
2586 return AArch64::ORRS_PPzPP;
2587 case AArch64::BRKA_PPzP:
2588 return AArch64::BRKAS_PPzP;
2589 case AArch64::BRKPA_PPzPP:
2590 return AArch64::BRKPAS_PPzPP;
2591 case AArch64::BRKB_PPzP:
2592 return AArch64::BRKBS_PPzP;
2593 case AArch64::BRKPB_PPzPP:
2594 return AArch64::BRKPBS_PPzPP;
2595 case AArch64::BRKN_PPzP:
2596 return AArch64::BRKNS_PPzP;
2597 case AArch64::RDFFR_PPz:
2598 return AArch64::RDFFRS_PPz;
2599 case AArch64::PTRUE_B:
2600 return AArch64::PTRUES_B;
2601 }
2602}
2603
2604// Is this a candidate for ld/st merging or pairing? For example, we don't
2605// touch volatiles or load/stores that have a hint to avoid pair formation.
2607
2608 bool IsPreLdSt = isPreLdSt(MI);
2609
2610 // If this is a volatile load/store, don't mess with it.
2611 if (MI.hasOrderedMemoryRef())
2612 return false;
2613
2614 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2615 // For Pre-inc LD/ST, the operand is shifted by one.
2616 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2617 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2618 "Expected a reg or frame index operand.");
2619
2620 // For Pre-indexed addressing quadword instructions, the third operand is the
2621 // immediate value.
2622 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2623
2624 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2625 return false;
2626
2627 // Can't merge/pair if the instruction modifies the base register.
2628 // e.g., ldr x0, [x0]
2629 // This case will never occur with an FI base.
2630 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2631 // STR<S,D,Q,W,X>pre, it can be merged.
2632 // For example:
2633 // ldr q0, [x11, #32]!
2634 // ldr q1, [x11, #16]
2635 // to
2636 // ldp q0, q1, [x11, #32]!
2637 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2638 Register BaseReg = MI.getOperand(1).getReg();
2640 if (MI.modifiesRegister(BaseReg, TRI))
2641 return false;
2642 }
2643
2644 // Check if this load/store has a hint to avoid pair formation.
2645 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2647 return false;
2648
2649 // Do not pair any callee-save store/reload instructions in the
2650 // prologue/epilogue if the CFI information encoded the operations as separate
2651 // instructions, as that will cause the size of the actual prologue to mismatch
2652 // with the prologue size recorded in the Windows CFI.
2653 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2654 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2655 MI.getMF()->getFunction().needsUnwindTableEntry();
2656 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2658 return false;
2659
2660 // On some CPUs quad load/store pairs are slower than two single load/stores.
2661 if (Subtarget.isPaired128Slow()) {
2662 switch (MI.getOpcode()) {
2663 default:
2664 break;
2665 case AArch64::LDURQi:
2666 case AArch64::STURQi:
2667 case AArch64::LDRQui:
2668 case AArch64::STRQui:
2669 return false;
2670 }
2671 }
2672
2673 return true;
2674}
2675
2678 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2679 const TargetRegisterInfo *TRI) const {
2680 if (!LdSt.mayLoadOrStore())
2681 return false;
2682
2683 const MachineOperand *BaseOp;
2684 TypeSize WidthN(0, false);
2685 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2686 WidthN, TRI))
2687 return false;
2688 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2689 // vector.
2690 Width = WidthN.isScalable()
2693 : WidthN.getKnownMinValue();
2694 BaseOps.push_back(BaseOp);
2695 return true;
2696}
2697
2698std::optional<ExtAddrMode>
2700 const TargetRegisterInfo *TRI) const {
2701 const MachineOperand *Base; // Filled with the base operand of MI.
2702 int64_t Offset; // Filled with the offset of MI.
2703 bool OffsetIsScalable;
2704 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2705 return std::nullopt;
2706
2707 if (!Base->isReg())
2708 return std::nullopt;
2709 ExtAddrMode AM;
2710 AM.BaseReg = Base->getReg();
2711 AM.Displacement = Offset;
2712 AM.ScaledReg = 0;
2713 AM.Scale = 0;
2714 return AM;
2715}
2716
2718 Register Reg,
2719 const MachineInstr &AddrI,
2720 ExtAddrMode &AM) const {
2721 // Filter out instructions into which we cannot fold.
2722 unsigned NumBytes;
2723 int64_t OffsetScale = 1;
2724 switch (MemI.getOpcode()) {
2725 default:
2726 return false;
2727
2728 case AArch64::LDURQi:
2729 case AArch64::STURQi:
2730 NumBytes = 16;
2731 break;
2732
2733 case AArch64::LDURDi:
2734 case AArch64::STURDi:
2735 case AArch64::LDURXi:
2736 case AArch64::STURXi:
2737 NumBytes = 8;
2738 break;
2739
2740 case AArch64::LDURWi:
2741 case AArch64::LDURSWi:
2742 case AArch64::STURWi:
2743 NumBytes = 4;
2744 break;
2745
2746 case AArch64::LDURHi:
2747 case AArch64::STURHi:
2748 case AArch64::LDURHHi:
2749 case AArch64::STURHHi:
2750 case AArch64::LDURSHXi:
2751 case AArch64::LDURSHWi:
2752 NumBytes = 2;
2753 break;
2754
2755 case AArch64::LDRBroX:
2756 case AArch64::LDRBBroX:
2757 case AArch64::LDRSBXroX:
2758 case AArch64::LDRSBWroX:
2759 case AArch64::STRBroX:
2760 case AArch64::STRBBroX:
2761 case AArch64::LDURBi:
2762 case AArch64::LDURBBi:
2763 case AArch64::LDURSBXi:
2764 case AArch64::LDURSBWi:
2765 case AArch64::STURBi:
2766 case AArch64::STURBBi:
2767 case AArch64::LDRBui:
2768 case AArch64::LDRBBui:
2769 case AArch64::LDRSBXui:
2770 case AArch64::LDRSBWui:
2771 case AArch64::STRBui:
2772 case AArch64::STRBBui:
2773 NumBytes = 1;
2774 break;
2775
2776 case AArch64::LDRQroX:
2777 case AArch64::STRQroX:
2778 case AArch64::LDRQui:
2779 case AArch64::STRQui:
2780 NumBytes = 16;
2781 OffsetScale = 16;
2782 break;
2783
2784 case AArch64::LDRDroX:
2785 case AArch64::STRDroX:
2786 case AArch64::LDRXroX:
2787 case AArch64::STRXroX:
2788 case AArch64::LDRDui:
2789 case AArch64::STRDui:
2790 case AArch64::LDRXui:
2791 case AArch64::STRXui:
2792 NumBytes = 8;
2793 OffsetScale = 8;
2794 break;
2795
2796 case AArch64::LDRWroX:
2797 case AArch64::LDRSWroX:
2798 case AArch64::STRWroX:
2799 case AArch64::LDRWui:
2800 case AArch64::LDRSWui:
2801 case AArch64::STRWui:
2802 NumBytes = 4;
2803 OffsetScale = 4;
2804 break;
2805
2806 case AArch64::LDRHroX:
2807 case AArch64::STRHroX:
2808 case AArch64::LDRHHroX:
2809 case AArch64::STRHHroX:
2810 case AArch64::LDRSHXroX:
2811 case AArch64::LDRSHWroX:
2812 case AArch64::LDRHui:
2813 case AArch64::STRHui:
2814 case AArch64::LDRHHui:
2815 case AArch64::STRHHui:
2816 case AArch64::LDRSHXui:
2817 case AArch64::LDRSHWui:
2818 NumBytes = 2;
2819 OffsetScale = 2;
2820 break;
2821 }
2822
2823 // Check the fold operand is not the loaded/stored value.
2824 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2825 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2826 return false;
2827
2828 // Handle memory instructions with a [Reg, Reg] addressing mode.
2829 if (MemI.getOperand(2).isReg()) {
2830 // Bail if the addressing mode already includes extension of the offset
2831 // register.
2832 if (MemI.getOperand(3).getImm())
2833 return false;
2834
2835 // Check if we actually have a scaled offset.
2836 if (MemI.getOperand(4).getImm() == 0)
2837 OffsetScale = 1;
2838
2839 // If the address instructions is folded into the base register, then the
2840 // addressing mode must not have a scale. Then we can swap the base and the
2841 // scaled registers.
2842 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2843 return false;
2844
2845 switch (AddrI.getOpcode()) {
2846 default:
2847 return false;
2848
2849 case AArch64::SBFMXri:
2850 // sxtw Xa, Wm
2851 // ldr Xd, [Xn, Xa, lsl #N]
2852 // ->
2853 // ldr Xd, [Xn, Wm, sxtw #N]
2854 if (AddrI.getOperand(2).getImm() != 0 ||
2855 AddrI.getOperand(3).getImm() != 31)
2856 return false;
2857
2858 AM.BaseReg = MemI.getOperand(1).getReg();
2859 if (AM.BaseReg == Reg)
2860 AM.BaseReg = MemI.getOperand(2).getReg();
2861 AM.ScaledReg = AddrI.getOperand(1).getReg();
2862 AM.Scale = OffsetScale;
2863 AM.Displacement = 0;
2865 return true;
2866
2867 case TargetOpcode::SUBREG_TO_REG: {
2868 // mov Wa, Wm
2869 // ldr Xd, [Xn, Xa, lsl #N]
2870 // ->
2871 // ldr Xd, [Xn, Wm, uxtw #N]
2872
2873 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2874 if (AddrI.getOperand(1).getImm() != 0 ||
2875 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2876 return false;
2877
2878 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2879 Register OffsetReg = AddrI.getOperand(2).getReg();
2880 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2881 return false;
2882
2883 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2884 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2885 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2886 DefMI.getOperand(3).getImm() != 0)
2887 return false;
2888
2889 AM.BaseReg = MemI.getOperand(1).getReg();
2890 if (AM.BaseReg == Reg)
2891 AM.BaseReg = MemI.getOperand(2).getReg();
2892 AM.ScaledReg = DefMI.getOperand(2).getReg();
2893 AM.Scale = OffsetScale;
2894 AM.Displacement = 0;
2896 return true;
2897 }
2898 }
2899 }
2900
2901 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2902
2903 // Check we are not breaking a potential conversion to an LDP.
2904 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2905 int64_t NewOffset) -> bool {
2906 int64_t MinOffset, MaxOffset;
2907 switch (NumBytes) {
2908 default:
2909 return true;
2910 case 4:
2911 MinOffset = -256;
2912 MaxOffset = 252;
2913 break;
2914 case 8:
2915 MinOffset = -512;
2916 MaxOffset = 504;
2917 break;
2918 case 16:
2919 MinOffset = -1024;
2920 MaxOffset = 1008;
2921 break;
2922 }
2923 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2924 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2925 };
2926 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2927 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2928 int64_t NewOffset = OldOffset + Disp;
2929 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2930 return false;
2931 // If the old offset would fit into an LDP, but the new offset wouldn't,
2932 // bail out.
2933 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2934 return false;
2935 AM.BaseReg = AddrI.getOperand(1).getReg();
2936 AM.ScaledReg = 0;
2937 AM.Scale = 0;
2938 AM.Displacement = NewOffset;
2940 return true;
2941 };
2942
2943 auto canFoldAddRegIntoAddrMode =
2944 [&](int64_t Scale,
2946 if (MemI.getOperand(2).getImm() != 0)
2947 return false;
2948 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2949 return false;
2950 AM.BaseReg = AddrI.getOperand(1).getReg();
2951 AM.ScaledReg = AddrI.getOperand(2).getReg();
2952 AM.Scale = Scale;
2953 AM.Displacement = 0;
2954 AM.Form = Form;
2955 return true;
2956 };
2957
2958 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2959 unsigned Opcode = MemI.getOpcode();
2960 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2961 Subtarget.isSTRQroSlow();
2962 };
2963
2964 int64_t Disp = 0;
2965 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2966 switch (AddrI.getOpcode()) {
2967 default:
2968 return false;
2969
2970 case AArch64::ADDXri:
2971 // add Xa, Xn, #N
2972 // ldr Xd, [Xa, #M]
2973 // ->
2974 // ldr Xd, [Xn, #N'+M]
2975 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2976 return canFoldAddSubImmIntoAddrMode(Disp);
2977
2978 case AArch64::SUBXri:
2979 // sub Xa, Xn, #N
2980 // ldr Xd, [Xa, #M]
2981 // ->
2982 // ldr Xd, [Xn, #N'+M]
2983 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2984 return canFoldAddSubImmIntoAddrMode(-Disp);
2985
2986 case AArch64::ADDXrs: {
2987 // add Xa, Xn, Xm, lsl #N
2988 // ldr Xd, [Xa]
2989 // ->
2990 // ldr Xd, [Xn, Xm, lsl #N]
2991
2992 // Don't fold the add if the result would be slower, unless optimising for
2993 // size.
2994 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2996 return false;
2997 Shift = AArch64_AM::getShiftValue(Shift);
2998 if (!OptSize) {
2999 if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast())
3000 return false;
3001 if (avoidSlowSTRQ(MemI))
3002 return false;
3003 }
3004 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3005 }
3006
3007 case AArch64::ADDXrr:
3008 // add Xa, Xn, Xm
3009 // ldr Xd, [Xa]
3010 // ->
3011 // ldr Xd, [Xn, Xm, lsl #0]
3012
3013 // Don't fold the add if the result would be slower, unless optimising for
3014 // size.
3015 if (!OptSize && avoidSlowSTRQ(MemI))
3016 return false;
3017 return canFoldAddRegIntoAddrMode(1);
3018
3019 case AArch64::ADDXrx:
3020 // add Xa, Xn, Wm, {s,u}xtw #N
3021 // ldr Xd, [Xa]
3022 // ->
3023 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3024
3025 // Don't fold the add if the result would be slower, unless optimising for
3026 // size.
3027 if (!OptSize && avoidSlowSTRQ(MemI))
3028 return false;
3029
3030 // Can fold only sign-/zero-extend of a word.
3031 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3033 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3034 return false;
3035
3036 return canFoldAddRegIntoAddrMode(
3037 1ULL << AArch64_AM::getArithShiftValue(Imm),
3040 }
3041}
3042
3043// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3044// return the opcode of an instruction performing the same operation, but using
3045// the [Reg, Reg] addressing mode.
3046static unsigned regOffsetOpcode(unsigned Opcode) {
3047 switch (Opcode) {
3048 default:
3049 llvm_unreachable("Address folding not implemented for instruction");
3050
3051 case AArch64::LDURQi:
3052 case AArch64::LDRQui:
3053 return AArch64::LDRQroX;
3054 case AArch64::STURQi:
3055 case AArch64::STRQui:
3056 return AArch64::STRQroX;
3057 case AArch64::LDURDi:
3058 case AArch64::LDRDui:
3059 return AArch64::LDRDroX;
3060 case AArch64::STURDi:
3061 case AArch64::STRDui:
3062 return AArch64::STRDroX;
3063 case AArch64::LDURXi:
3064 case AArch64::LDRXui:
3065 return AArch64::LDRXroX;
3066 case AArch64::STURXi:
3067 case AArch64::STRXui:
3068 return AArch64::STRXroX;
3069 case AArch64::LDURWi:
3070 case AArch64::LDRWui:
3071 return AArch64::LDRWroX;
3072 case AArch64::LDURSWi:
3073 case AArch64::LDRSWui:
3074 return AArch64::LDRSWroX;
3075 case AArch64::STURWi:
3076 case AArch64::STRWui:
3077 return AArch64::STRWroX;
3078 case AArch64::LDURHi:
3079 case AArch64::LDRHui:
3080 return AArch64::LDRHroX;
3081 case AArch64::STURHi:
3082 case AArch64::STRHui:
3083 return AArch64::STRHroX;
3084 case AArch64::LDURHHi:
3085 case AArch64::LDRHHui:
3086 return AArch64::LDRHHroX;
3087 case AArch64::STURHHi:
3088 case AArch64::STRHHui:
3089 return AArch64::STRHHroX;
3090 case AArch64::LDURSHXi:
3091 case AArch64::LDRSHXui:
3092 return AArch64::LDRSHXroX;
3093 case AArch64::LDURSHWi:
3094 case AArch64::LDRSHWui:
3095 return AArch64::LDRSHWroX;
3096 case AArch64::LDURBi:
3097 case AArch64::LDRBui:
3098 return AArch64::LDRBroX;
3099 case AArch64::LDURBBi:
3100 case AArch64::LDRBBui:
3101 return AArch64::LDRBBroX;
3102 case AArch64::LDURSBXi:
3103 case AArch64::LDRSBXui:
3104 return AArch64::LDRSBXroX;
3105 case AArch64::LDURSBWi:
3106 case AArch64::LDRSBWui:
3107 return AArch64::LDRSBWroX;
3108 case AArch64::STURBi:
3109 case AArch64::STRBui:
3110 return AArch64::STRBroX;
3111 case AArch64::STURBBi:
3112 case AArch64::STRBBui:
3113 return AArch64::STRBBroX;
3114 }
3115}
3116
3117// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3118// the opcode of an instruction performing the same operation, but using the
3119// [Reg, #Imm] addressing mode with scaled offset.
3120unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3121 switch (Opcode) {
3122 default:
3123 llvm_unreachable("Address folding not implemented for instruction");
3124
3125 case AArch64::LDURQi:
3126 Scale = 16;
3127 return AArch64::LDRQui;
3128 case AArch64::STURQi:
3129 Scale = 16;
3130 return AArch64::STRQui;
3131 case AArch64::LDURDi:
3132 Scale = 8;
3133 return AArch64::LDRDui;
3134 case AArch64::STURDi:
3135 Scale = 8;
3136 return AArch64::STRDui;
3137 case AArch64::LDURXi:
3138 Scale = 8;
3139 return AArch64::LDRXui;
3140 case AArch64::STURXi:
3141 Scale = 8;
3142 return AArch64::STRXui;
3143 case AArch64::LDURWi:
3144 Scale = 4;
3145 return AArch64::LDRWui;
3146 case AArch64::LDURSWi:
3147 Scale = 4;
3148 return AArch64::LDRSWui;
3149 case AArch64::STURWi:
3150 Scale = 4;
3151 return AArch64::STRWui;
3152 case AArch64::LDURHi:
3153 Scale = 2;
3154 return AArch64::LDRHui;
3155 case AArch64::STURHi:
3156 Scale = 2;
3157 return AArch64::STRHui;
3158 case AArch64::LDURHHi:
3159 Scale = 2;
3160 return AArch64::LDRHHui;
3161 case AArch64::STURHHi:
3162 Scale = 2;
3163 return AArch64::STRHHui;
3164 case AArch64::LDURSHXi:
3165 Scale = 2;
3166 return AArch64::LDRSHXui;
3167 case AArch64::LDURSHWi:
3168 Scale = 2;
3169 return AArch64::LDRSHWui;
3170 case AArch64::LDURBi:
3171 Scale = 1;
3172 return AArch64::LDRBui;
3173 case AArch64::LDURBBi:
3174 Scale = 1;
3175 return AArch64::LDRBBui;
3176 case AArch64::LDURSBXi:
3177 Scale = 1;
3178 return AArch64::LDRSBXui;
3179 case AArch64::LDURSBWi:
3180 Scale = 1;
3181 return AArch64::LDRSBWui;
3182 case AArch64::STURBi:
3183 Scale = 1;
3184 return AArch64::STRBui;
3185 case AArch64::STURBBi:
3186 Scale = 1;
3187 return AArch64::STRBBui;
3188 case AArch64::LDRQui:
3189 case AArch64::STRQui:
3190 Scale = 16;
3191 return Opcode;
3192 case AArch64::LDRDui:
3193 case AArch64::STRDui:
3194 case AArch64::LDRXui:
3195 case AArch64::STRXui:
3196 Scale = 8;
3197 return Opcode;
3198 case AArch64::LDRWui:
3199 case AArch64::LDRSWui:
3200 case AArch64::STRWui:
3201 Scale = 4;
3202 return Opcode;
3203 case AArch64::LDRHui:
3204 case AArch64::STRHui:
3205 case AArch64::LDRHHui:
3206 case AArch64::STRHHui:
3207 case AArch64::LDRSHXui:
3208 case AArch64::LDRSHWui:
3209 Scale = 2;
3210 return Opcode;
3211 case AArch64::LDRBui:
3212 case AArch64::LDRBBui:
3213 case AArch64::LDRSBXui:
3214 case AArch64::LDRSBWui:
3215 case AArch64::STRBui:
3216 case AArch64::STRBBui:
3217 Scale = 1;
3218 return Opcode;
3219 }
3220}
3221
3222// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3223// the opcode of an instruction performing the same operation, but using the
3224// [Reg, #Imm] addressing mode with unscaled offset.
3225unsigned unscaledOffsetOpcode(unsigned Opcode) {
3226 switch (Opcode) {
3227 default:
3228 llvm_unreachable("Address folding not implemented for instruction");
3229
3230 case AArch64::LDURQi:
3231 case AArch64::STURQi:
3232 case AArch64::LDURDi:
3233 case AArch64::STURDi:
3234 case AArch64::LDURXi:
3235 case AArch64::STURXi:
3236 case AArch64::LDURWi:
3237 case AArch64::LDURSWi:
3238 case AArch64::STURWi:
3239 case AArch64::LDURHi:
3240 case AArch64::STURHi:
3241 case AArch64::LDURHHi:
3242 case AArch64::STURHHi:
3243 case AArch64::LDURSHXi:
3244 case AArch64::LDURSHWi:
3245 case AArch64::LDURBi:
3246 case AArch64::STURBi:
3247 case AArch64::LDURBBi:
3248 case AArch64::STURBBi:
3249 case AArch64::LDURSBWi:
3250 case AArch64::LDURSBXi:
3251 return Opcode;
3252 case AArch64::LDRQui:
3253 return AArch64::LDURQi;
3254 case AArch64::STRQui:
3255 return AArch64::STURQi;
3256 case AArch64::LDRDui:
3257 return AArch64::LDURDi;
3258 case AArch64::STRDui:
3259 return AArch64::STURDi;
3260 case AArch64::LDRXui:
3261 return AArch64::LDURXi;
3262 case AArch64::STRXui:
3263 return AArch64::STURXi;
3264 case AArch64::LDRWui:
3265 return AArch64::LDURWi;
3266 case AArch64::LDRSWui:
3267 return AArch64::LDURSWi;
3268 case AArch64::STRWui:
3269 return AArch64::STURWi;
3270 case AArch64::LDRHui:
3271 return AArch64::LDURHi;
3272 case AArch64::STRHui:
3273 return AArch64::STURHi;
3274 case AArch64::LDRHHui:
3275 return AArch64::LDURHHi;
3276 case AArch64::STRHHui:
3277 return AArch64::STURHHi;
3278 case AArch64::LDRSHXui:
3279 return AArch64::LDURSHXi;
3280 case AArch64::LDRSHWui:
3281 return AArch64::LDURSHWi;
3282 case AArch64::LDRBBui:
3283 return AArch64::LDURBBi;
3284 case AArch64::LDRBui:
3285 return AArch64::LDURBi;
3286 case AArch64::STRBBui:
3287 return AArch64::STURBBi;
3288 case AArch64::STRBui:
3289 return AArch64::STURBi;
3290 case AArch64::LDRSBWui:
3291 return AArch64::LDURSBWi;
3292 case AArch64::LDRSBXui:
3293 return AArch64::LDURSBXi;
3294 }
3295}
3296
3297// Given the opcode of a memory load/store instruction, return the opcode of an
3298// instruction performing the same operation, but using
3299// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3300// offset register.
3301static unsigned offsetExtendOpcode(unsigned Opcode) {
3302 switch (Opcode) {
3303 default:
3304 llvm_unreachable("Address folding not implemented for instruction");
3305
3306 case AArch64::LDRQroX:
3307 case AArch64::LDURQi:
3308 case AArch64::LDRQui:
3309 return AArch64::LDRQroW;
3310 case AArch64::STRQroX:
3311 case AArch64::STURQi:
3312 case AArch64::STRQui:
3313 return AArch64::STRQroW;
3314 case AArch64::LDRDroX:
3315 case AArch64::LDURDi:
3316 case AArch64::LDRDui:
3317 return AArch64::LDRDroW;
3318 case AArch64::STRDroX:
3319 case AArch64::STURDi:
3320 case AArch64::STRDui:
3321 return AArch64::STRDroW;
3322 case AArch64::LDRXroX:
3323 case AArch64::LDURXi:
3324 case AArch64::LDRXui:
3325 return AArch64::LDRXroW;
3326 case AArch64::STRXroX:
3327 case AArch64::STURXi:
3328 case AArch64::STRXui:
3329 return AArch64::STRXroW;
3330 case AArch64::LDRWroX:
3331 case AArch64::LDURWi:
3332 case AArch64::LDRWui:
3333 return AArch64::LDRWroW;
3334 case AArch64::LDRSWroX:
3335 case AArch64::LDURSWi:
3336 case AArch64::LDRSWui:
3337 return AArch64::LDRSWroW;
3338 case AArch64::STRWroX:
3339 case AArch64::STURWi:
3340 case AArch64::STRWui:
3341 return AArch64::STRWroW;
3342 case AArch64::LDRHroX:
3343 case AArch64::LDURHi:
3344 case AArch64::LDRHui:
3345 return AArch64::LDRHroW;
3346 case AArch64::STRHroX:
3347 case AArch64::STURHi:
3348 case AArch64::STRHui:
3349 return AArch64::STRHroW;
3350 case AArch64::LDRHHroX:
3351 case AArch64::LDURHHi:
3352 case AArch64::LDRHHui:
3353 return AArch64::LDRHHroW;
3354 case AArch64::STRHHroX:
3355 case AArch64::STURHHi:
3356 case AArch64::STRHHui:
3357 return AArch64::STRHHroW;
3358 case AArch64::LDRSHXroX:
3359 case AArch64::LDURSHXi:
3360 case AArch64::LDRSHXui:
3361 return AArch64::LDRSHXroW;
3362 case AArch64::LDRSHWroX:
3363 case AArch64::LDURSHWi:
3364 case AArch64::LDRSHWui:
3365 return AArch64::LDRSHWroW;
3366 case AArch64::LDRBroX:
3367 case AArch64::LDURBi:
3368 case AArch64::LDRBui:
3369 return AArch64::LDRBroW;
3370 case AArch64::LDRBBroX:
3371 case AArch64::LDURBBi:
3372 case AArch64::LDRBBui:
3373 return AArch64::LDRBBroW;
3374 case AArch64::LDRSBXroX:
3375 case AArch64::LDURSBXi:
3376 case AArch64::LDRSBXui:
3377 return AArch64::LDRSBXroW;
3378 case AArch64::LDRSBWroX:
3379 case AArch64::LDURSBWi:
3380 case AArch64::LDRSBWui:
3381 return AArch64::LDRSBWroW;
3382 case AArch64::STRBroX:
3383 case AArch64::STURBi:
3384 case AArch64::STRBui:
3385 return AArch64::STRBroW;
3386 case AArch64::STRBBroX:
3387 case AArch64::STURBBi:
3388 case AArch64::STRBBui:
3389 return AArch64::STRBBroW;
3390 }
3391}
3392
3394 const ExtAddrMode &AM) const {
3395
3396 const DebugLoc &DL = MemI.getDebugLoc();
3397 MachineBasicBlock &MBB = *MemI.getParent();
3399
3401 if (AM.ScaledReg) {
3402 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3403 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3404 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3405 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3406 .addReg(MemI.getOperand(0).getReg(),
3407 MemI.mayLoad() ? RegState::Define : 0)
3408 .addReg(AM.BaseReg)
3409 .addReg(AM.ScaledReg)
3410 .addImm(0)
3411 .addImm(AM.Scale > 1)
3412 .setMemRefs(MemI.memoperands())
3413 .setMIFlags(MemI.getFlags());
3414 return B.getInstr();
3415 }
3416
3417 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3418 "Addressing mode not supported for folding");
3419
3420 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3421 unsigned Scale = 1;
3422 unsigned Opcode = MemI.getOpcode();
3423 if (isInt<9>(AM.Displacement))
3424 Opcode = unscaledOffsetOpcode(Opcode);
3425 else
3426 Opcode = scaledOffsetOpcode(Opcode, Scale);
3427
3428 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3429 .addReg(MemI.getOperand(0).getReg(),
3430 MemI.mayLoad() ? RegState::Define : 0)
3431 .addReg(AM.BaseReg)
3432 .addImm(AM.Displacement / Scale)
3433 .setMemRefs(MemI.memoperands())
3434 .setMIFlags(MemI.getFlags());
3435 return B.getInstr();
3436 }
3437
3440 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3441 assert(AM.ScaledReg && !AM.Displacement &&
3442 "Address offset can be a register or an immediate, but not both");
3443 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3444 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3445 // Make sure the offset register is in the correct register class.
3446 Register OffsetReg = AM.ScaledReg;
3447 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3448 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3449 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3450 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3451 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3452 }
3453 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3454 .addReg(MemI.getOperand(0).getReg(),
3455 MemI.mayLoad() ? RegState::Define : 0)
3456 .addReg(AM.BaseReg)
3457 .addReg(OffsetReg)
3459 .addImm(AM.Scale != 1)
3460 .setMemRefs(MemI.memoperands())
3461 .setMIFlags(MemI.getFlags());
3462
3463 return B.getInstr();
3464 }
3465
3467 "Function must not be called with an addressing mode it can't handle");
3468}
3469
3471 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3472 bool &OffsetIsScalable, TypeSize &Width,
3473 const TargetRegisterInfo *TRI) const {
3474 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3475 // Handle only loads/stores with base register followed by immediate offset.
3476 if (LdSt.getNumExplicitOperands() == 3) {
3477 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3478 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3479 !LdSt.getOperand(2).isImm())
3480 return false;
3481 } else if (LdSt.getNumExplicitOperands() == 4) {
3482 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3483 if (!LdSt.getOperand(1).isReg() ||
3484 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3485 !LdSt.getOperand(3).isImm())
3486 return false;
3487 } else
3488 return false;
3489
3490 // Get the scaling factor for the instruction and set the width for the
3491 // instruction.
3492 TypeSize Scale(0U, false);
3493 int64_t Dummy1, Dummy2;
3494
3495 // If this returns false, then it's an instruction we don't want to handle.
3496 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3497 return false;
3498
3499 // Compute the offset. Offset is calculated as the immediate operand
3500 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3501 // set to 1.
3502 if (LdSt.getNumExplicitOperands() == 3) {
3503 BaseOp = &LdSt.getOperand(1);
3504 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3505 } else {
3506 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3507 BaseOp = &LdSt.getOperand(2);
3508 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3509 }
3510 OffsetIsScalable = Scale.isScalable();
3511
3512 if (!BaseOp->isReg() && !BaseOp->isFI())
3513 return false;
3514
3515 return true;
3516}
3517
3520 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3521 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3522 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3523 return OfsOp;
3524}
3525
3526bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3527 TypeSize &Width, int64_t &MinOffset,
3528 int64_t &MaxOffset) {
3529 switch (Opcode) {
3530 // Not a memory operation or something we want to handle.
3531 default:
3532 Scale = TypeSize::getFixed(0);
3533 Width = TypeSize::getFixed(0);
3534 MinOffset = MaxOffset = 0;
3535 return false;
3536 case AArch64::STRWpost:
3537 case AArch64::LDRWpost:
3538 Width = TypeSize::getFixed(32);
3539 Scale = TypeSize::getFixed(4);
3540 MinOffset = -256;
3541 MaxOffset = 255;
3542 break;
3543 case AArch64::LDURQi:
3544 case AArch64::STURQi:
3545 Width = TypeSize::getFixed(16);
3546 Scale = TypeSize::getFixed(1);
3547 MinOffset = -256;
3548 MaxOffset = 255;
3549 break;
3550 case AArch64::PRFUMi:
3551 case AArch64::LDURXi:
3552 case AArch64::LDURDi:
3553 case AArch64::LDAPURXi:
3554 case AArch64::STURXi:
3555 case AArch64::STURDi:
3556 case AArch64::STLURXi:
3557 Width = TypeSize::getFixed(8);
3558 Scale = TypeSize::getFixed(1);
3559 MinOffset = -256;
3560 MaxOffset = 255;
3561 break;
3562 case AArch64::LDURWi:
3563 case AArch64::LDURSi:
3564 case AArch64::LDURSWi:
3565 case AArch64::LDAPURi:
3566 case AArch64::LDAPURSWi:
3567 case AArch64::STURWi:
3568 case AArch64::STURSi:
3569 case AArch64::STLURWi:
3570 Width = TypeSize::getFixed(4);
3571 Scale = TypeSize::getFixed(1);
3572 MinOffset = -256;
3573 MaxOffset = 255;
3574 break;
3575 case AArch64::LDURHi:
3576 case AArch64::LDURHHi:
3577 case AArch64::LDURSHXi:
3578 case AArch64::LDURSHWi:
3579 case AArch64::LDAPURHi:
3580 case AArch64::LDAPURSHWi:
3581 case AArch64::LDAPURSHXi:
3582 case AArch64::STURHi:
3583 case AArch64::STURHHi:
3584 case AArch64::STLURHi:
3585 Width = TypeSize::getFixed(2);
3586 Scale = TypeSize::getFixed(1);
3587 MinOffset = -256;
3588 MaxOffset = 255;
3589 break;
3590 case AArch64::LDURBi:
3591 case AArch64::LDURBBi:
3592 case AArch64::LDURSBXi:
3593 case AArch64::LDURSBWi:
3594 case AArch64::LDAPURBi:
3595 case AArch64::LDAPURSBWi:
3596 case AArch64::LDAPURSBXi:
3597 case AArch64::STURBi:
3598 case AArch64::STURBBi:
3599 case AArch64::STLURBi:
3600 Width = TypeSize::getFixed(1);
3601 Scale = TypeSize::getFixed(1);
3602 MinOffset = -256;
3603 MaxOffset = 255;
3604 break;
3605 case AArch64::LDPQi:
3606 case AArch64::LDNPQi:
3607 case AArch64::STPQi:
3608 case AArch64::STNPQi:
3609 Scale = TypeSize::getFixed(16);
3610 Width = TypeSize::getFixed(32);
3611 MinOffset = -64;
3612 MaxOffset = 63;
3613 break;
3614 case AArch64::LDRQui:
3615 case AArch64::STRQui:
3616 Scale = TypeSize::getFixed(16);
3617 Width = TypeSize::getFixed(16);
3618 MinOffset = 0;
3619 MaxOffset = 4095;
3620 break;
3621 case AArch64::LDPXi:
3622 case AArch64::LDPDi:
3623 case AArch64::LDNPXi:
3624 case AArch64::LDNPDi:
3625 case AArch64::STPXi:
3626 case AArch64::STPDi:
3627 case AArch64::STNPXi:
3628 case AArch64::STNPDi:
3629 Scale = TypeSize::getFixed(8);
3630 Width = TypeSize::getFixed(16);
3631 MinOffset = -64;
3632 MaxOffset = 63;
3633 break;
3634 case AArch64::PRFMui:
3635 case AArch64::LDRXui:
3636 case AArch64::LDRDui:
3637 case AArch64::STRXui:
3638 case AArch64::STRDui:
3639 Scale = TypeSize::getFixed(8);
3640 Width = TypeSize::getFixed(8);
3641 MinOffset = 0;
3642 MaxOffset = 4095;
3643 break;
3644 case AArch64::StoreSwiftAsyncContext:
3645 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3646 Scale = TypeSize::getFixed(1);
3647 Width = TypeSize::getFixed(8);
3648 MinOffset = 0;
3649 MaxOffset = 4095;
3650 break;
3651 case AArch64::LDPWi:
3652 case AArch64::LDPSi:
3653 case AArch64::LDNPWi:
3654 case AArch64::LDNPSi:
3655 case AArch64::STPWi:
3656 case AArch64::STPSi:
3657 case AArch64::STNPWi:
3658 case AArch64::STNPSi:
3659 Scale = TypeSize::getFixed(4);
3660 Width = TypeSize::getFixed(8);
3661 MinOffset = -64;
3662 MaxOffset = 63;
3663 break;
3664 case AArch64::LDRWui:
3665 case AArch64::LDRSui:
3666 case AArch64::LDRSWui:
3667 case AArch64::STRWui:
3668 case AArch64::STRSui:
3669 Scale = TypeSize::getFixed(4);
3670 Width = TypeSize::getFixed(4);
3671 MinOffset = 0;
3672 MaxOffset = 4095;
3673 break;
3674 case AArch64::LDRHui:
3675 case AArch64::LDRHHui:
3676 case AArch64::LDRSHWui:
3677 case AArch64::LDRSHXui:
3678 case AArch64::STRHui:
3679 case AArch64::STRHHui:
3680 Scale = TypeSize::getFixed(2);
3681 Width = TypeSize::getFixed(2);
3682 MinOffset = 0;
3683 MaxOffset = 4095;
3684 break;
3685 case AArch64::LDRBui:
3686 case AArch64::LDRBBui:
3687 case AArch64::LDRSBWui:
3688 case AArch64::LDRSBXui:
3689 case AArch64::STRBui:
3690 case AArch64::STRBBui:
3691 Scale = TypeSize::getFixed(1);
3692 Width = TypeSize::getFixed(1);
3693 MinOffset = 0;
3694 MaxOffset = 4095;
3695 break;
3696 case AArch64::STPXpre:
3697 case AArch64::LDPXpost:
3698 case AArch64::STPDpre:
3699 case AArch64::LDPDpost:
3700 Scale = TypeSize::getFixed(8);
3701 Width = TypeSize::getFixed(8);
3702 MinOffset = -512;
3703 MaxOffset = 504;
3704 break;
3705 case AArch64::STPQpre:
3706 case AArch64::LDPQpost:
3707 Scale = TypeSize::getFixed(16);
3708 Width = TypeSize::getFixed(16);
3709 MinOffset = -1024;
3710 MaxOffset = 1008;
3711 break;
3712 case AArch64::STRXpre:
3713 case AArch64::STRDpre:
3714 case AArch64::LDRXpost:
3715 case AArch64::LDRDpost:
3716 Scale = TypeSize::getFixed(1);
3717 Width = TypeSize::getFixed(8);
3718 MinOffset = -256;
3719 MaxOffset = 255;
3720 break;
3721 case AArch64::STRQpre:
3722 case AArch64::LDRQpost:
3723 Scale = TypeSize::getFixed(1);
3724 Width = TypeSize::getFixed(16);
3725 MinOffset = -256;
3726 MaxOffset = 255;
3727 break;
3728 case AArch64::ADDG:
3729 Scale = TypeSize::getFixed(16);
3730 Width = TypeSize::getFixed(0);
3731 MinOffset = 0;
3732 MaxOffset = 63;
3733 break;
3734 case AArch64::TAGPstack:
3735 Scale = TypeSize::getFixed(16);
3736 Width = TypeSize::getFixed(0);
3737 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3738 // of 63 (not 64!).
3739 MinOffset = -63;
3740 MaxOffset = 63;
3741 break;
3742 case AArch64::LDG:
3743 case AArch64::STGi:
3744 case AArch64::STZGi:
3745 Scale = TypeSize::getFixed(16);
3746 Width = TypeSize::getFixed(16);
3747 MinOffset = -256;
3748 MaxOffset = 255;
3749 break;
3750 case AArch64::STR_ZZZZXI:
3751 case AArch64::LDR_ZZZZXI:
3752 Scale = TypeSize::getScalable(16);
3753 Width = TypeSize::getScalable(16 * 4);
3754 MinOffset = -256;
3755 MaxOffset = 252;
3756 break;
3757 case AArch64::STR_ZZZXI:
3758 case AArch64::LDR_ZZZXI:
3759 Scale = TypeSize::getScalable(16);
3760 Width = TypeSize::getScalable(16 * 3);
3761 MinOffset = -256;
3762 MaxOffset = 253;
3763 break;
3764 case AArch64::STR_ZZXI:
3765 case AArch64::LDR_ZZXI:
3766 Scale = TypeSize::getScalable(16);
3767 Width = TypeSize::getScalable(16 * 2);
3768 MinOffset = -256;
3769 MaxOffset = 254;
3770 break;
3771 case AArch64::LDR_PXI:
3772 case AArch64::STR_PXI:
3773 Scale = TypeSize::getScalable(2);
3774 Width = TypeSize::getScalable(2);
3775 MinOffset = -256;
3776 MaxOffset = 255;
3777 break;
3778 case AArch64::LDR_PPXI:
3779 case AArch64::STR_PPXI:
3780 Scale = TypeSize::getScalable(2);
3781 Width = TypeSize::getScalable(2 * 2);
3782 MinOffset = -256;
3783 MaxOffset = 254;
3784 break;
3785 case AArch64::LDR_ZXI:
3786 case AArch64::STR_ZXI:
3787 Scale = TypeSize::getScalable(16);
3788 Width = TypeSize::getScalable(16);
3789 MinOffset = -256;
3790 MaxOffset = 255;
3791 break;
3792 case AArch64::LD1B_IMM:
3793 case AArch64::LD1H_IMM:
3794 case AArch64::LD1W_IMM:
3795 case AArch64::LD1D_IMM:
3796 case AArch64::LDNT1B_ZRI:
3797 case AArch64::LDNT1H_ZRI:
3798 case AArch64::LDNT1W_ZRI:
3799 case AArch64::LDNT1D_ZRI:
3800 case AArch64::ST1B_IMM:
3801 case AArch64::ST1H_IMM:
3802 case AArch64::ST1W_IMM:
3803 case AArch64::ST1D_IMM:
3804 case AArch64::STNT1B_ZRI:
3805 case AArch64::STNT1H_ZRI:
3806 case AArch64::STNT1W_ZRI:
3807 case AArch64::STNT1D_ZRI:
3808 case AArch64::LDNF1B_IMM:
3809 case AArch64::LDNF1H_IMM:
3810 case AArch64::LDNF1W_IMM:
3811 case AArch64::LDNF1D_IMM:
3812 // A full vectors worth of data
3813 // Width = mbytes * elements
3814 Scale = TypeSize::getScalable(16);
3815 Width = TypeSize::getScalable(16);
3816 MinOffset = -8;
3817 MaxOffset = 7;
3818 break;
3819 case AArch64::LD2B_IMM:
3820 case AArch64::LD2H_IMM:
3821 case AArch64::LD2W_IMM:
3822 case AArch64::LD2D_IMM:
3823 case AArch64::ST2B_IMM:
3824 case AArch64::ST2H_IMM:
3825 case AArch64::ST2W_IMM:
3826 case AArch64::ST2D_IMM:
3827 Scale = TypeSize::getScalable(32);
3828 Width = TypeSize::getScalable(16 * 2);
3829 MinOffset = -8;
3830 MaxOffset = 7;
3831 break;
3832 case AArch64::LD3B_IMM:
3833 case AArch64::LD3H_IMM:
3834 case AArch64::LD3W_IMM:
3835 case AArch64::LD3D_IMM:
3836 case AArch64::ST3B_IMM:
3837 case AArch64::ST3H_IMM:
3838 case AArch64::ST3W_IMM:
3839 case AArch64::ST3D_IMM:
3840 Scale = TypeSize::getScalable(48);
3841 Width = TypeSize::getScalable(16 * 3);
3842 MinOffset = -8;
3843 MaxOffset = 7;
3844 break;
3845 case AArch64::LD4B_IMM:
3846 case AArch64::LD4H_IMM:
3847 case AArch64::LD4W_IMM:
3848 case AArch64::LD4D_IMM:
3849 case AArch64::ST4B_IMM:
3850 case AArch64::ST4H_IMM:
3851 case AArch64::ST4W_IMM:
3852 case AArch64::ST4D_IMM:
3853 Scale = TypeSize::getScalable(64);
3854 Width = TypeSize::getScalable(16 * 4);
3855 MinOffset = -8;
3856 MaxOffset = 7;
3857 break;
3858 case AArch64::LD1B_H_IMM:
3859 case AArch64::LD1SB_H_IMM:
3860 case AArch64::LD1H_S_IMM:
3861 case AArch64::LD1SH_S_IMM:
3862 case AArch64::LD1W_D_IMM:
3863 case AArch64::LD1SW_D_IMM:
3864 case AArch64::ST1B_H_IMM:
3865 case AArch64::ST1H_S_IMM:
3866 case AArch64::ST1W_D_IMM:
3867 case AArch64::LDNF1B_H_IMM:
3868 case AArch64::LDNF1SB_H_IMM:
3869 case AArch64::LDNF1H_S_IMM:
3870 case AArch64::LDNF1SH_S_IMM:
3871 case AArch64::LDNF1W_D_IMM:
3872 case AArch64::LDNF1SW_D_IMM:
3873 // A half vector worth of data
3874 // Width = mbytes * elements
3875 Scale = TypeSize::getScalable(8);
3876 Width = TypeSize::getScalable(8);
3877 MinOffset = -8;
3878 MaxOffset = 7;
3879 break;
3880 case AArch64::LD1B_S_IMM:
3881 case AArch64::LD1SB_S_IMM:
3882 case AArch64::LD1H_D_IMM:
3883 case AArch64::LD1SH_D_IMM:
3884 case AArch64::ST1B_S_IMM:
3885 case AArch64::ST1H_D_IMM:
3886 case AArch64::LDNF1B_S_IMM:
3887 case AArch64::LDNF1SB_S_IMM:
3888 case AArch64::LDNF1H_D_IMM:
3889 case AArch64::LDNF1SH_D_IMM:
3890 // A quarter vector worth of data
3891 // Width = mbytes * elements
3892 Scale = TypeSize::getScalable(4);
3893 Width = TypeSize::getScalable(4);
3894 MinOffset = -8;
3895 MaxOffset = 7;
3896 break;
3897 case AArch64::LD1B_D_IMM:
3898 case AArch64::LD1SB_D_IMM:
3899 case AArch64::ST1B_D_IMM:
3900 case AArch64::LDNF1B_D_IMM:
3901 case AArch64::LDNF1SB_D_IMM:
3902 // A eighth vector worth of data
3903 // Width = mbytes * elements
3904 Scale = TypeSize::getScalable(2);
3905 Width = TypeSize::getScalable(2);
3906 MinOffset = -8;
3907 MaxOffset = 7;
3908 break;
3909 case AArch64::ST2Gi:
3910 case AArch64::STZ2Gi:
3911 Scale = TypeSize::getFixed(16);
3912 Width = TypeSize::getFixed(32);
3913 MinOffset = -256;
3914 MaxOffset = 255;
3915 break;
3916 case AArch64::STGPi:
3917 Scale = TypeSize::getFixed(16);
3918 Width = TypeSize::getFixed(16);
3919 MinOffset = -64;
3920 MaxOffset = 63;
3921 break;
3922 case AArch64::LD1RB_IMM:
3923 case AArch64::LD1RB_H_IMM:
3924 case AArch64::LD1RB_S_IMM:
3925 case AArch64::LD1RB_D_IMM:
3926 case AArch64::LD1RSB_H_IMM:
3927 case AArch64::LD1RSB_S_IMM:
3928 case AArch64::LD1RSB_D_IMM:
3929 Scale = TypeSize::getFixed(1);
3930 Width = TypeSize::getFixed(1);
3931 MinOffset = 0;
3932 MaxOffset = 63;
3933 break;
3934 case AArch64::LD1RH_IMM:
3935 case AArch64::LD1RH_S_IMM:
3936 case AArch64::LD1RH_D_IMM:
3937 case AArch64::LD1RSH_S_IMM:
3938 case AArch64::LD1RSH_D_IMM:
3939 Scale = TypeSize::getFixed(2);
3940 Width = TypeSize::getFixed(2);
3941 MinOffset = 0;
3942 MaxOffset = 63;
3943 break;
3944 case AArch64::LD1RW_IMM:
3945 case AArch64::LD1RW_D_IMM:
3946 case AArch64::LD1RSW_IMM:
3947 Scale = TypeSize::getFixed(4);
3948 Width = TypeSize::getFixed(4);
3949 MinOffset = 0;
3950 MaxOffset = 63;
3951 break;
3952 case AArch64::LD1RD_IMM:
3953 Scale = TypeSize::getFixed(8);
3954 Width = TypeSize::getFixed(8);
3955 MinOffset = 0;
3956 MaxOffset = 63;
3957 break;
3958 }
3959
3960 return true;
3961}
3962
3963// Scaling factor for unscaled load or store.
3965 switch (Opc) {
3966 default:
3967 llvm_unreachable("Opcode has unknown scale!");
3968 case AArch64::LDRBBui:
3969 case AArch64::LDURBBi:
3970 case AArch64::LDRSBWui:
3971 case AArch64::LDURSBWi:
3972 case AArch64::STRBBui:
3973 case AArch64::STURBBi:
3974 return 1;
3975 case AArch64::LDRHHui:
3976 case AArch64::LDURHHi:
3977 case AArch64::LDRSHWui:
3978 case AArch64::LDURSHWi:
3979 case AArch64::STRHHui:
3980 case AArch64::STURHHi:
3981 return 2;
3982 case AArch64::LDRSui:
3983 case AArch64::LDURSi:
3984 case AArch64::LDRSpre:
3985 case AArch64::LDRSWui:
3986 case AArch64::LDURSWi:
3987 case AArch64::LDRSWpre:
3988 case AArch64::LDRWpre:
3989 case AArch64::LDRWui:
3990 case AArch64::LDURWi:
3991 case AArch64::STRSui:
3992 case AArch64::STURSi:
3993 case AArch64::STRSpre:
3994 case AArch64::STRWui:
3995 case AArch64::STURWi:
3996 case AArch64::STRWpre:
3997 case AArch64::LDPSi:
3998 case AArch64::LDPSWi:
3999 case AArch64::LDPWi:
4000 case AArch64::STPSi:
4001 case AArch64::STPWi:
4002 return 4;
4003 case AArch64::LDRDui:
4004 case AArch64::LDURDi:
4005 case AArch64::LDRDpre:
4006 case AArch64::LDRXui:
4007 case AArch64::LDURXi:
4008 case AArch64::LDRXpre:
4009 case AArch64::STRDui:
4010 case AArch64::STURDi:
4011 case AArch64::STRDpre:
4012 case AArch64::STRXui:
4013 case AArch64::STURXi:
4014 case AArch64::STRXpre:
4015 case AArch64::LDPDi:
4016 case AArch64::LDPXi:
4017 case AArch64::STPDi:
4018 case AArch64::STPXi:
4019 return 8;
4020 case AArch64::LDRQui:
4021 case AArch64::LDURQi:
4022 case AArch64::STRQui:
4023 case AArch64::STURQi:
4024 case AArch64::STRQpre:
4025 case AArch64::LDPQi:
4026 case AArch64::LDRQpre:
4027 case AArch64::STPQi:
4028 case AArch64::STGi:
4029 case AArch64::STZGi:
4030 case AArch64::ST2Gi:
4031 case AArch64::STZ2Gi:
4032 case AArch64::STGPi:
4033 return 16;
4034 }
4035}
4036
4038 switch (MI.getOpcode()) {
4039 default:
4040 return false;
4041 case AArch64::LDRWpre:
4042 case AArch64::LDRXpre:
4043 case AArch64::LDRSWpre:
4044 case AArch64::LDRSpre:
4045 case AArch64::LDRDpre:
4046 case AArch64::LDRQpre:
4047 return true;
4048 }
4049}
4050
4052 switch (MI.getOpcode()) {
4053 default:
4054 return false;
4055 case AArch64::STRWpre:
4056 case AArch64::STRXpre:
4057 case AArch64::STRSpre:
4058 case AArch64::STRDpre:
4059 case AArch64::STRQpre:
4060 return true;
4061 }
4062}
4063
4065 return isPreLd(MI) || isPreSt(MI);
4066}
4067
4069 switch (MI.getOpcode()) {
4070 default:
4071 return false;
4072 case AArch64::LDPSi:
4073 case AArch64::LDPSWi:
4074 case AArch64::LDPDi:
4075 case AArch64::LDPQi:
4076 case AArch64::LDPWi:
4077 case AArch64::LDPXi:
4078 case AArch64::STPSi:
4079 case AArch64::STPDi:
4080 case AArch64::STPQi:
4081 case AArch64::STPWi:
4082 case AArch64::STPXi:
4083 case AArch64::STGPi:
4084 return true;
4085 }
4086}
4087
4089 unsigned Idx =
4091 : 1;
4092 return MI.getOperand(Idx);
4093}
4094
4095const MachineOperand &
4097 unsigned Idx =
4099 : 2;
4100 return MI.getOperand(Idx);
4101}
4102
4104 Register Reg) {
4105 if (MI.getParent() == nullptr)
4106 return nullptr;
4107 const MachineFunction *MF = MI.getParent()->getParent();
4108 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4109}
4110
4112 auto IsHFPR = [&](const MachineOperand &Op) {
4113 if (!Op.isReg())
4114 return false;
4115 auto Reg = Op.getReg();
4116 if (Reg.isPhysical())
4117 return AArch64::FPR16RegClass.contains(Reg);
4118 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4119 return TRC == &AArch64::FPR16RegClass ||
4120 TRC == &AArch64::FPR16_loRegClass;
4121 };
4122 return llvm::any_of(MI.operands(), IsHFPR);
4123}
4124
4126 auto IsQFPR = [&](const MachineOperand &Op) {
4127 if (!Op.isReg())
4128 return false;
4129 auto Reg = Op.getReg();
4130 if (Reg.isPhysical())
4131 return AArch64::FPR128RegClass.contains(Reg);
4132 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4133 return TRC == &AArch64::FPR128RegClass ||
4134 TRC == &AArch64::FPR128_loRegClass;
4135 };
4136 return llvm::any_of(MI.operands(), IsQFPR);
4137}
4138
4140 switch (MI.getOpcode()) {
4141 case AArch64::BRK:
4142 case AArch64::HLT:
4143 case AArch64::PACIASP:
4144 case AArch64::PACIBSP:
4145 // Implicit BTI behavior.
4146 return true;
4147 case AArch64::PAUTH_PROLOGUE:
4148 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4149 return true;
4150 case AArch64::HINT: {
4151 unsigned Imm = MI.getOperand(0).getImm();
4152 // Explicit BTI instruction.
4153 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4154 return true;
4155 // PACI(A|B)SP instructions.
4156 if (Imm == 25 || Imm == 27)
4157 return true;
4158 return false;
4159 }
4160 default:
4161 return false;
4162 }
4163}
4164
4166 auto IsFPR = [&](const MachineOperand &Op) {
4167 if (!Op.isReg())
4168 return false;
4169 auto Reg = Op.getReg();
4170 if (Reg.isPhysical())
4171 return AArch64::FPR128RegClass.contains(Reg) ||
4172 AArch64::FPR64RegClass.contains(Reg) ||
4173 AArch64::FPR32RegClass.contains(Reg) ||
4174 AArch64::FPR16RegClass.contains(Reg) ||
4175 AArch64::FPR8RegClass.contains(Reg);
4176
4177 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4178 return TRC == &AArch64::FPR128RegClass ||
4179 TRC == &AArch64::FPR128_loRegClass ||
4180 TRC == &AArch64::FPR64RegClass ||
4181 TRC == &AArch64::FPR64_loRegClass ||
4182 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4183 TRC == &AArch64::FPR8RegClass;
4184 };
4185 return llvm::any_of(MI.operands(), IsFPR);
4186}
4187
4188// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4189// scaled.
4190static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4191 int Scale = AArch64InstrInfo::getMemScale(Opc);
4192
4193 // If the byte-offset isn't a multiple of the stride, we can't scale this
4194 // offset.
4195 if (Offset % Scale != 0)
4196 return false;
4197
4198 // Convert the byte-offset used by unscaled into an "element" offset used
4199 // by the scaled pair load/store instructions.
4200 Offset /= Scale;
4201 return true;
4202}
4203
4204static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4205 if (FirstOpc == SecondOpc)
4206 return true;
4207 // We can also pair sign-ext and zero-ext instructions.
4208 switch (FirstOpc) {
4209 default:
4210 return false;
4211 case AArch64::STRSui:
4212 case AArch64::STURSi:
4213 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4214 case AArch64::STRDui:
4215 case AArch64::STURDi:
4216 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4217 case AArch64::STRQui:
4218 case AArch64::STURQi:
4219 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4220 case AArch64::STRWui:
4221 case AArch64::STURWi:
4222 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4223 case AArch64::STRXui:
4224 case AArch64::STURXi:
4225 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4226 case AArch64::LDRSui:
4227 case AArch64::LDURSi:
4228 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4229 case AArch64::LDRDui:
4230 case AArch64::LDURDi:
4231 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4232 case AArch64::LDRQui:
4233 case AArch64::LDURQi:
4234 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4235 case AArch64::LDRWui:
4236 case AArch64::LDURWi:
4237 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4238 case AArch64::LDRSWui:
4239 case AArch64::LDURSWi:
4240 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4241 case AArch64::LDRXui:
4242 case AArch64::LDURXi:
4243 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4244 }
4245 // These instructions can't be paired based on their opcodes.
4246 return false;
4247}
4248
4249static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4250 int64_t Offset1, unsigned Opcode1, int FI2,
4251 int64_t Offset2, unsigned Opcode2) {
4252 // Accesses through fixed stack object frame indices may access a different
4253 // fixed stack slot. Check that the object offsets + offsets match.
4254 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4255 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4256 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4257 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4258 // Convert to scaled object offsets.
4259 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4260 if (ObjectOffset1 % Scale1 != 0)
4261 return false;
4262 ObjectOffset1 /= Scale1;
4263 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4264 if (ObjectOffset2 % Scale2 != 0)
4265 return false;
4266 ObjectOffset2 /= Scale2;
4267 ObjectOffset1 += Offset1;
4268 ObjectOffset2 += Offset2;
4269 return ObjectOffset1 + 1 == ObjectOffset2;
4270 }
4271
4272 return FI1 == FI2;
4273}
4274
4275/// Detect opportunities for ldp/stp formation.
4276///
4277/// Only called for LdSt for which getMemOperandWithOffset returns true.
4279 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4280 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4281 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4282 unsigned NumBytes) const {
4283 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4284 const MachineOperand &BaseOp1 = *BaseOps1.front();
4285 const MachineOperand &BaseOp2 = *BaseOps2.front();
4286 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4287 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4288 if (BaseOp1.getType() != BaseOp2.getType())
4289 return false;
4290
4291 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4292 "Only base registers and frame indices are supported.");
4293
4294 // Check for both base regs and base FI.
4295 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4296 return false;
4297
4298 // Only cluster up to a single pair.
4299 if (ClusterSize > 2)
4300 return false;
4301
4302 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4303 return false;
4304
4305 // Can we pair these instructions based on their opcodes?
4306 unsigned FirstOpc = FirstLdSt.getOpcode();
4307 unsigned SecondOpc = SecondLdSt.getOpcode();
4308 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4309 return false;
4310
4311 // Can't merge volatiles or load/stores that have a hint to avoid pair
4312 // formation, for example.
4313 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4314 !isCandidateToMergeOrPair(SecondLdSt))
4315 return false;
4316
4317 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4318 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4319 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4320 return false;
4321
4322 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4323 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4324 return false;
4325
4326 // Pairwise instructions have a 7-bit signed offset field.
4327 if (Offset1 > 63 || Offset1 < -64)
4328 return false;
4329
4330 // The caller should already have ordered First/SecondLdSt by offset.
4331 // Note: except for non-equal frame index bases
4332 if (BaseOp1.isFI()) {
4333 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4334 "Caller should have ordered offsets.");
4335
4336 const MachineFrameInfo &MFI =
4337 FirstLdSt.getParent()->getParent()->getFrameInfo();
4338 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4339 BaseOp2.getIndex(), Offset2, SecondOpc);
4340 }
4341
4342 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4343
4344 return Offset1 + 1 == Offset2;
4345}
4346
4348 unsigned Reg, unsigned SubIdx,
4349 unsigned State,
4350 const TargetRegisterInfo *TRI) {
4351 if (!SubIdx)
4352 return MIB.addReg(Reg, State);
4353
4355 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4356 return MIB.addReg(Reg, State, SubIdx);
4357}
4358
4359static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4360 unsigned NumRegs) {
4361 // We really want the positive remainder mod 32 here, that happens to be
4362 // easily obtainable with a mask.
4363 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4364}
4365
4368 const DebugLoc &DL, MCRegister DestReg,
4369 MCRegister SrcReg, bool KillSrc,
4370 unsigned Opcode,
4371 ArrayRef<unsigned> Indices) const {
4372 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4374 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4375 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4376 unsigned NumRegs = Indices.size();
4377
4378 int SubReg = 0, End = NumRegs, Incr = 1;
4379 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4380 SubReg = NumRegs - 1;
4381 End = -1;
4382 Incr = -1;
4383 }
4384
4385 for (; SubReg != End; SubReg += Incr) {
4386 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4387 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4388 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4389 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4390 }
4391}
4392
4395 DebugLoc DL, unsigned DestReg,
4396 unsigned SrcReg, bool KillSrc,
4397 unsigned Opcode, unsigned ZeroReg,
4398 llvm::ArrayRef<unsigned> Indices) const {
4400 unsigned NumRegs = Indices.size();
4401
4402#ifndef NDEBUG
4403 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4404 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4405 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4406 "GPR reg sequences should not be able to overlap");
4407#endif
4408
4409 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4410 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4411 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4412 MIB.addReg(ZeroReg);
4413 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4414 MIB.addImm(0);
4415 }
4416}
4417
4420 const DebugLoc &DL, MCRegister DestReg,
4421 MCRegister SrcReg, bool KillSrc) const {
4422 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4423 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4425
4426 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4427 // If either operand is WSP, expand to ADD #0.
4428 if (Subtarget.hasZeroCycleRegMove()) {
4429 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4430 MCRegister DestRegX = TRI->getMatchingSuperReg(
4431 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4432 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4433 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4434 // This instruction is reading and writing X registers. This may upset
4435 // the register scavenger and machine verifier, so we need to indicate
4436 // that we are reading an undefined value from SrcRegX, but a proper
4437 // value from SrcReg.
4438 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4439 .addReg(SrcRegX, RegState::Undef)
4440 .addImm(0)
4442 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4443 } else {
4444 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4445 .addReg(SrcReg, getKillRegState(KillSrc))
4446 .addImm(0)
4448 }
4449 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4450 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4451 .addImm(0)
4453 } else {
4454 if (Subtarget.hasZeroCycleRegMove()) {
4455 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4456 MCRegister DestRegX = TRI->getMatchingSuperReg(
4457 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4458 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4459 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4460 // This instruction is reading and writing X registers. This may upset
4461 // the register scavenger and machine verifier, so we need to indicate
4462 // that we are reading an undefined value from SrcRegX, but a proper
4463 // value from SrcReg.
4464 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4465 .addReg(AArch64::XZR)
4466 .addReg(SrcRegX, RegState::Undef)
4467 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4468 } else {
4469 // Otherwise, expand to ORR WZR.
4470 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4471 .addReg(AArch64::WZR)
4472 .addReg(SrcReg, getKillRegState(KillSrc));
4473 }
4474 }
4475 return;
4476 }
4477
4478 // Copy a Predicate register by ORRing with itself.
4479 if (AArch64::PPRRegClass.contains(DestReg) &&
4480 AArch64::PPRRegClass.contains(SrcReg)) {
4481 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4482 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4483 .addReg(SrcReg) // Pg
4484 .addReg(SrcReg)
4485 .addReg(SrcReg, getKillRegState(KillSrc));
4486 return;
4487 }
4488
4489 // Copy a predicate-as-counter register by ORRing with itself as if it
4490 // were a regular predicate (mask) register.
4491 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4492 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4493 if (DestIsPNR || SrcIsPNR) {
4494 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4495 "Unexpected predicate-as-counter register.");
4496 auto ToPPR = [](MCRegister R) -> MCRegister {
4497 return (R - AArch64::PN0) + AArch64::P0;
4498 };
4499 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4500 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4501
4502 if (PPRSrcReg != PPRDestReg) {
4503 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4504 .addReg(PPRSrcReg) // Pg
4505 .addReg(PPRSrcReg)
4506 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4507 if (DestIsPNR)
4508 NewMI.addDef(DestReg, RegState::Implicit);
4509 }
4510 return;
4511 }
4512
4513 // Copy a Z register by ORRing with itself.
4514 if (AArch64::ZPRRegClass.contains(DestReg) &&
4515 AArch64::ZPRRegClass.contains(SrcReg)) {
4516 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4517 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4518 .addReg(SrcReg)
4519 .addReg(SrcReg, getKillRegState(KillSrc));
4520 return;
4521 }
4522
4523 // Copy a Z register pair by copying the individual sub-registers.
4524 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4525 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4526 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4527 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4528 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4529 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4530 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4531 Indices);
4532 return;
4533 }
4534
4535 // Copy a Z register triple by copying the individual sub-registers.
4536 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4537 AArch64::ZPR3RegClass.contains(SrcReg)) {
4538 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4539 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4540 AArch64::zsub2};
4541 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4542 Indices);
4543 return;
4544 }
4545
4546 // Copy a Z register quad by copying the individual sub-registers.
4547 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4548 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4549 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4550 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4551 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4552 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4553 AArch64::zsub2, AArch64::zsub3};
4554 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4555 Indices);
4556 return;
4557 }
4558
4559 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4560 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4561 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4562 // If either operand is SP, expand to ADD #0.
4563 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4564 .addReg(SrcReg, getKillRegState(KillSrc))
4565 .addImm(0)
4567 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4568 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4569 .addImm(0)
4571 } else {
4572 // Otherwise, expand to ORR XZR.
4573 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4574 .addReg(AArch64::XZR)
4575 .addReg(SrcReg, getKillRegState(KillSrc));
4576 }
4577 return;
4578 }
4579
4580 // Copy a DDDD register quad by copying the individual sub-registers.
4581 if (AArch64::DDDDRegClass.contains(DestReg) &&
4582 AArch64::DDDDRegClass.contains(SrcReg)) {
4583 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4584 AArch64::dsub2, AArch64::dsub3};
4585 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4586 Indices);
4587 return;
4588 }
4589
4590 // Copy a DDD register triple by copying the individual sub-registers.
4591 if (AArch64::DDDRegClass.contains(DestReg) &&
4592 AArch64::DDDRegClass.contains(SrcReg)) {
4593 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4594 AArch64::dsub2};
4595 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4596 Indices);
4597 return;
4598 }
4599
4600 // Copy a DD register pair by copying the individual sub-registers.
4601 if (AArch64::DDRegClass.contains(DestReg) &&
4602 AArch64::DDRegClass.contains(SrcReg)) {
4603 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4604 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4605 Indices);
4606 return;
4607 }
4608
4609 // Copy a QQQQ register quad by copying the individual sub-registers.
4610 if (AArch64::QQQQRegClass.contains(DestReg) &&
4611 AArch64::QQQQRegClass.contains(SrcReg)) {
4612 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4613 AArch64::qsub2, AArch64::qsub3};
4614 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4615 Indices);
4616 return;
4617 }
4618
4619 // Copy a QQQ register triple by copying the individual sub-registers.
4620 if (AArch64::QQQRegClass.contains(DestReg) &&
4621 AArch64::QQQRegClass.contains(SrcReg)) {
4622 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4623 AArch64::qsub2};
4624 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4625 Indices);
4626 return;
4627 }
4628
4629 // Copy a QQ register pair by copying the individual sub-registers.
4630 if (AArch64::QQRegClass.contains(DestReg) &&
4631 AArch64::QQRegClass.contains(SrcReg)) {
4632 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4633 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4634 Indices);
4635 return;
4636 }
4637
4638 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4639 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4640 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4641 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4642 AArch64::XZR, Indices);
4643 return;
4644 }
4645
4646 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4647 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4648 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4649 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4650 AArch64::WZR, Indices);
4651 return;
4652 }
4653
4654 if (AArch64::FPR128RegClass.contains(DestReg) &&
4655 AArch64::FPR128RegClass.contains(SrcReg)) {
4656 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4657 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4658 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4659 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4660 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4661 else if (Subtarget.hasNEON())
4662 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4663 .addReg(SrcReg)
4664 .addReg(SrcReg, getKillRegState(KillSrc));
4665 else {
4666 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4667 .addReg(AArch64::SP, RegState::Define)
4668 .addReg(SrcReg, getKillRegState(KillSrc))
4669 .addReg(AArch64::SP)
4670 .addImm(-16);
4671 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4672 .addReg(AArch64::SP, RegState::Define)
4673 .addReg(DestReg, RegState::Define)
4674 .addReg(AArch64::SP)
4675 .addImm(16);
4676 }
4677 return;
4678 }
4679
4680 if (AArch64::FPR64RegClass.contains(DestReg) &&
4681 AArch64::FPR64RegClass.contains(SrcReg)) {
4682 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4683 .addReg(SrcReg, getKillRegState(KillSrc));
4684 return;
4685 }
4686
4687 if (AArch64::FPR32RegClass.contains(DestReg) &&
4688 AArch64::FPR32RegClass.contains(SrcReg)) {
4689 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4690 .addReg(SrcReg, getKillRegState(KillSrc));
4691 return;
4692 }
4693
4694 if (AArch64::FPR16RegClass.contains(DestReg) &&
4695 AArch64::FPR16RegClass.contains(SrcReg)) {
4696 DestReg =
4697 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4698 SrcReg =
4699 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4700 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4701 .addReg(SrcReg, getKillRegState(KillSrc));
4702 return;
4703 }
4704
4705 if (AArch64::FPR8RegClass.contains(DestReg) &&
4706 AArch64::FPR8RegClass.contains(SrcReg)) {
4707 DestReg =
4708 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4709 SrcReg =
4710 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4711 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4712 .addReg(SrcReg, getKillRegState(KillSrc));
4713 return;
4714 }
4715
4716 // Copies between GPR64 and FPR64.
4717 if (AArch64::FPR64RegClass.contains(DestReg) &&
4718 AArch64::GPR64RegClass.contains(SrcReg)) {
4719 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4720 .addReg(SrcReg, getKillRegState(KillSrc));
4721 return;
4722 }
4723 if (AArch64::GPR64RegClass.contains(DestReg) &&
4724 AArch64::FPR64RegClass.contains(SrcReg)) {
4725 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4726 .addReg(SrcReg, getKillRegState(KillSrc));
4727 return;
4728 }
4729 // Copies between GPR32 and FPR32.
4730 if (AArch64::FPR32RegClass.contains(DestReg) &&
4731 AArch64::GPR32RegClass.contains(SrcReg)) {
4732 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4733 .addReg(SrcReg, getKillRegState(KillSrc));
4734 return;
4735 }
4736 if (AArch64::GPR32RegClass.contains(DestReg) &&
4737 AArch64::FPR32RegClass.contains(SrcReg)) {
4738 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4739 .addReg(SrcReg, getKillRegState(KillSrc));
4740 return;
4741 }
4742
4743 if (DestReg == AArch64::NZCV) {
4744 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4745 BuildMI(MBB, I, DL, get(AArch64::MSR))
4746 .addImm(AArch64SysReg::NZCV)
4747 .addReg(SrcReg, getKillRegState(KillSrc))
4748 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4749 return;
4750 }
4751
4752 if (SrcReg == AArch64::NZCV) {
4753 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4754 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4755 .addImm(AArch64SysReg::NZCV)
4756 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4757 return;
4758 }
4759
4760#ifndef NDEBUG
4762 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4763 << TRI.getRegAsmName(SrcReg) << "\n";
4764#endif
4765 llvm_unreachable("unimplemented reg-to-reg copy");
4766}
4767
4770 MachineBasicBlock::iterator InsertBefore,
4771 const MCInstrDesc &MCID,
4772 Register SrcReg, bool IsKill,
4773 unsigned SubIdx0, unsigned SubIdx1, int FI,
4774 MachineMemOperand *MMO) {
4775 Register SrcReg0 = SrcReg;
4776 Register SrcReg1 = SrcReg;
4777 if (SrcReg.isPhysical()) {
4778 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4779 SubIdx0 = 0;
4780 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4781 SubIdx1 = 0;
4782 }
4783 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4784 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4785 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4786 .addFrameIndex(FI)
4787 .addImm(0)
4788 .addMemOperand(MMO);
4789}
4790
4793 Register SrcReg, bool isKill, int FI,
4794 const TargetRegisterClass *RC,
4795 const TargetRegisterInfo *TRI,
4796 Register VReg) const {
4797 MachineFunction &MF = *MBB.getParent();
4798 MachineFrameInfo &MFI = MF.getFrameInfo();
4799
4801 MachineMemOperand *MMO =
4803 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4804 unsigned Opc = 0;
4805 bool Offset = true;
4807 unsigned StackID = TargetStackID::Default;
4808 switch (TRI->getSpillSize(*RC)) {
4809 case 1:
4810 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4811 Opc = AArch64::STRBui;
4812 break;
4813 case 2:
4814 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4815 Opc = AArch64::STRHui;
4816 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
4817 assert(Subtarget.hasSVEorSME() &&
4818 "Unexpected register store without SVE store instructions");
4819 Opc = AArch64::STR_PXI;
4821 } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
4822 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4823 "Unexpected register store without SVE2p1 or SME2");
4824 if (SrcReg.isVirtual()) {
4825 auto NewSrcReg =
4826 MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
4827 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
4828 .addReg(SrcReg);
4829 SrcReg = NewSrcReg;
4830 } else
4831 SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
4832 Opc = AArch64::STR_PXI;
4834 }
4835 break;
4836 case 4:
4837 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4838 Opc = AArch64::STRWui;
4839 if (SrcReg.isVirtual())
4840 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4841 else
4842 assert(SrcReg != AArch64::WSP);
4843 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4844 Opc = AArch64::STRSui;
4845 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4846 Opc = AArch64::STR_PPXI;
4848 }
4849 break;
4850 case 8:
4851 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4852 Opc = AArch64::STRXui;
4853 if (SrcReg.isVirtual())
4854 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4855 else
4856 assert(SrcReg != AArch64::SP);
4857 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4858 Opc = AArch64::STRDui;
4859 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4861 get(AArch64::STPWi), SrcReg, isKill,
4862 AArch64::sube32, AArch64::subo32, FI, MMO);
4863 return;
4864 }
4865 break;
4866 case 16:
4867 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4868 Opc = AArch64::STRQui;
4869 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4870 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4871 Opc = AArch64::ST1Twov1d;
4872 Offset = false;
4873 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4875 get(AArch64::STPXi), SrcReg, isKill,
4876 AArch64::sube64, AArch64::subo64, FI, MMO);
4877 return;
4878 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4879 assert(Subtarget.hasSVEorSME() &&
4880 "Unexpected register store without SVE store instructions");
4881 Opc = AArch64::STR_ZXI;
4883 }
4884 break;
4885 case 24:
4886 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4887 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4888 Opc = AArch64::ST1Threev1d;
4889 Offset = false;
4890 }
4891 break;
4892 case 32:
4893 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4894 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4895 Opc = AArch64::ST1Fourv1d;
4896 Offset = false;
4897 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4898 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4899 Opc = AArch64::ST1Twov2d;
4900 Offset = false;
4901 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4902 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4903 assert(Subtarget.hasSVEorSME() &&
4904 "Unexpected register store without SVE store instructions");
4905 Opc = AArch64::STR_ZZXI;
4907 }
4908 break;
4909 case 48:
4910 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4911 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4912 Opc = AArch64::ST1Threev2d;
4913 Offset = false;
4914 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4915 assert(Subtarget.hasSVEorSME() &&
4916 "Unexpected register store without SVE store instructions");
4917 Opc = AArch64::STR_ZZZXI;
4919 }
4920 break;
4921 case 64:
4922 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4923 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4924 Opc = AArch64::ST1Fourv2d;
4925 Offset = false;
4926 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4927 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4928 assert(Subtarget.hasSVEorSME() &&
4929 "Unexpected register store without SVE store instructions");
4930 Opc = AArch64::STR_ZZZZXI;
4932 }
4933 break;
4934 }
4935 assert(Opc && "Unknown register class");
4936 MFI.setStackID(FI, StackID);
4937
4938 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4939 .addReg(SrcReg, getKillRegState(isKill))
4940 .addFrameIndex(FI);
4941
4942 if (Offset)
4943 MI.addImm(0);
4944 if (PNRReg.isValid())
4945 MI.addDef(PNRReg, RegState::Implicit);
4946 MI.addMemOperand(MMO);
4947}
4948
4951 MachineBasicBlock::iterator InsertBefore,
4952 const MCInstrDesc &MCID,
4953 Register DestReg, unsigned SubIdx0,
4954 unsigned SubIdx1, int FI,
4955 MachineMemOperand *MMO) {
4956 Register DestReg0 = DestReg;
4957 Register DestReg1 = DestReg;
4958 bool IsUndef = true;
4959 if (DestReg.isPhysical()) {
4960 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4961 SubIdx0 = 0;
4962 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4963 SubIdx1 = 0;
4964 IsUndef = false;
4965 }
4966 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4967 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4968 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4969 .addFrameIndex(FI)
4970 .addImm(0)
4971 .addMemOperand(MMO);
4972}
4973
4976 Register DestReg, int FI,
4977 const TargetRegisterClass *RC,
4978 const TargetRegisterInfo *TRI,
4979 Register VReg) const {
4980 MachineFunction &MF = *MBB.getParent();
4981 MachineFrameInfo &MFI = MF.getFrameInfo();
4983 MachineMemOperand *MMO =
4985 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4986
4987 unsigned Opc = 0;
4988 bool Offset = true;
4989 unsigned StackID = TargetStackID::Default;
4991 switch (TRI->getSpillSize(*RC)) {
4992 case 1:
4993 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4994 Opc = AArch64::LDRBui;
4995 break;
4996 case 2:
4997 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4998 Opc = AArch64::LDRHui;
4999 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5000 assert(Subtarget.hasSVEorSME() &&
5001 "Unexpected register load without SVE load instructions");
5002 Opc = AArch64::LDR_PXI;
5004 } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
5005 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
5006 "Unexpected register load without SVE2p1 or SME2");
5007 PNRReg = DestReg;
5008 if (DestReg.isVirtual())
5009 DestReg = MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
5010 else
5011 DestReg = (DestReg - AArch64::PN0) + AArch64::P0;
5012 Opc = AArch64::LDR_PXI;
5014 }
5015 break;
5016 case 4:
5017 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5018 Opc = AArch64::LDRWui;
5019 if (DestReg.isVirtual())
5020 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5021 else
5022 assert(DestReg != AArch64::WSP);
5023 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5024 Opc = AArch64::LDRSui;
5025 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5026 Opc = AArch64::LDR_PPXI;
5028 }
5029 break;
5030 case 8:
5031 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5032 Opc = AArch64::LDRXui;
5033 if (DestReg.isVirtual())
5034 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5035 else
5036 assert(DestReg != AArch64::SP);
5037 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5038 Opc = AArch64::LDRDui;
5039 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5041 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5042 AArch64::subo32, FI, MMO);
5043 return;
5044 }
5045 break;
5046 case 16:
5047 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5048 Opc = AArch64::LDRQui;
5049 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5050 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5051 Opc = AArch64::LD1Twov1d;
5052 Offset = false;
5053 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5055 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5056 AArch64::subo64, FI, MMO);
5057 return;
5058 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5059 assert(Subtarget.hasSVEorSME() &&
5060 "Unexpected register load without SVE load instructions");
5061 Opc = AArch64::LDR_ZXI;
5063 }
5064 break;
5065 case 24:
5066 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5067 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5068 Opc = AArch64::LD1Threev1d;
5069 Offset = false;
5070 }
5071 break;
5072 case 32:
5073 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5074 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5075 Opc = AArch64::LD1Fourv1d;
5076 Offset = false;
5077 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5078 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5079 Opc = AArch64::LD1Twov2d;
5080 Offset = false;
5081 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5082 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5083 assert(Subtarget.hasSVEorSME() &&
5084 "Unexpected register load without SVE load instructions");
5085 Opc = AArch64::LDR_ZZXI;
5087 }
5088 break;
5089 case 48:
5090 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5091 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5092 Opc = AArch64::LD1Threev2d;
5093 Offset = false;
5094 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5095 assert(Subtarget.hasSVEorSME() &&
5096 "Unexpected register load without SVE load instructions");
5097 Opc = AArch64::LDR_ZZZXI;
5099 }
5100 break;
5101 case 64:
5102 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5103 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5104 Opc = AArch64::LD1Fourv2d;
5105 Offset = false;
5106 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5107 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5108 assert(Subtarget.hasSVEorSME() &&
5109 "Unexpected register load without SVE load instructions");
5110 Opc = AArch64::LDR_ZZZZXI;
5112 }
5113 break;
5114 }
5115
5116 assert(Opc && "Unknown register class");
5117 MFI.setStackID(FI, StackID);
5118
5119 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5120 .addReg(DestReg, getDefRegState(true))
5121 .addFrameIndex(FI);
5122 if (Offset)
5123 MI.addImm(0);
5124 if (PNRReg.isValid() && !PNRReg.isVirtual())
5125 MI.addDef(PNRReg, RegState::Implicit);
5126 MI.addMemOperand(MMO);
5127
5128 if (PNRReg.isValid() && PNRReg.isVirtual())
5129 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5130 .addReg(DestReg);
5131}
5132
5134 const MachineInstr &UseMI,
5135 const TargetRegisterInfo *TRI) {
5136 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5137 UseMI.getIterator()),
5138 [TRI](const MachineInstr &I) {
5139 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5140 I.readsRegister(AArch64::NZCV, TRI);
5141 });
5142}
5143
5145 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5146 // The smallest scalable element supported by scaled SVE addressing
5147 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5148 // byte offset must always be a multiple of 2.
5149 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5150
5151 // VGSized offsets are divided by '2', because the VG register is the
5152 // the number of 64bit granules as opposed to 128bit vector chunks,
5153 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5154 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5155 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5156 ByteSized = Offset.getFixed();
5157 VGSized = Offset.getScalable() / 2;
5158}
5159
5160/// Returns the offset in parts to which this frame offset can be
5161/// decomposed for the purpose of describing a frame offset.
5162/// For non-scalable offsets this is simply its byte size.
5164 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5165 int64_t &NumDataVectors) {
5166 // The smallest scalable element supported by scaled SVE addressing
5167 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5168 // byte offset must always be a multiple of 2.
5169 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5170
5171 NumBytes = Offset.getFixed();
5172 NumDataVectors = 0;
5173 NumPredicateVectors = Offset.getScalable() / 2;
5174 // This method is used to get the offsets to adjust the frame offset.
5175 // If the function requires ADDPL to be used and needs more than two ADDPL
5176 // instructions, part of the offset is folded into NumDataVectors so that it
5177 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5178 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5179 NumPredicateVectors > 62) {
5180 NumDataVectors = NumPredicateVectors / 8;
5181 NumPredicateVectors -= NumDataVectors * 8;
5182 }
5183}
5184
5185// Convenience function to create a DWARF expression for
5186// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5187static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5188 int NumVGScaledBytes, unsigned VG,
5189 llvm::raw_string_ostream &Comment) {
5190 uint8_t buffer[16];
5191
5192 if (NumBytes) {
5193 Expr.push_back(dwarf::DW_OP_consts);
5194 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5195 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5196 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5197 }
5198
5199 if (NumVGScaledBytes) {
5200 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5201 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5202
5203 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5204 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5205 Expr.push_back(0);
5206
5207 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5208 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5209
5210 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5211 << std::abs(NumVGScaledBytes) << " * VG";
5212 }
5213}
5214
5215// Creates an MCCFIInstruction:
5216// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5218 unsigned Reg,
5219 const StackOffset &Offset) {
5220 int64_t NumBytes, NumVGScaledBytes;
5222 NumVGScaledBytes);
5223 std::string CommentBuffer;
5224 llvm::raw_string_ostream Comment(CommentBuffer);
5225
5226 if (Reg == AArch64::SP)
5227 Comment << "sp";
5228 else if (Reg == AArch64::FP)
5229 Comment << "fp";
5230 else
5231 Comment << printReg(Reg, &TRI);
5232
5233 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5234 SmallString<64> Expr;
5235 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5236 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5237 Expr.push_back(0);
5238 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5239 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5240
5241 // Wrap this into DW_CFA_def_cfa.
5242 SmallString<64> DefCfaExpr;
5243 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5244 uint8_t buffer[16];
5245 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5246 DefCfaExpr.append(Expr.str());
5247 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5248 Comment.str());
5249}
5250
5252 unsigned FrameReg, unsigned Reg,
5253 const StackOffset &Offset,
5254 bool LastAdjustmentWasScalable) {
5255 if (Offset.getScalable())
5256 return createDefCFAExpression(TRI, Reg, Offset);
5257
5258 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5259 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5260
5261 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5262 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5263}
5264
5266 unsigned Reg,
5267 const StackOffset &OffsetFromDefCFA) {
5268 int64_t NumBytes, NumVGScaledBytes;
5270 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5271
5272 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5273
5274 // Non-scalable offsets can use DW_CFA_offset directly.
5275 if (!NumVGScaledBytes)
5276 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5277
5278 std::string CommentBuffer;
5279 llvm::raw_string_ostream Comment(CommentBuffer);
5280 Comment << printReg(Reg, &TRI) << " @ cfa";
5281
5282 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5283 SmallString<64> OffsetExpr;
5284 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5285 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5286
5287 // Wrap this into DW_CFA_expression
5288 SmallString<64> CfaExpr;
5289 CfaExpr.push_back(dwarf::DW_CFA_expression);
5290 uint8_t buffer[16];
5291 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5292 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5293 CfaExpr.append(OffsetExpr.str());
5294
5295 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5296 Comment.str());
5297}
5298
5299// Helper function to emit a frame offset adjustment from a given
5300// pointer (SrcReg), stored into DestReg. This function is explicit
5301// in that it requires the opcode.
5304 const DebugLoc &DL, unsigned DestReg,
5305 unsigned SrcReg, int64_t Offset, unsigned Opc,
5306 const TargetInstrInfo *TII,
5307 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5308 bool *HasWinCFI, bool EmitCFAOffset,
5309 StackOffset CFAOffset, unsigned FrameReg) {
5310 int Sign = 1;
5311 unsigned MaxEncoding, ShiftSize;
5312 switch (Opc) {
5313 case AArch64::ADDXri:
5314 case AArch64::ADDSXri:
5315 case AArch64::SUBXri:
5316 case AArch64::SUBSXri:
5317 MaxEncoding = 0xfff;
5318 ShiftSize = 12;
5319 break;
5320 case AArch64::ADDVL_XXI:
5321 case AArch64::ADDPL_XXI:
5322 case AArch64::ADDSVL_XXI:
5323 case AArch64::ADDSPL_XXI:
5324 MaxEncoding = 31;
5325 ShiftSize = 0;
5326 if (Offset < 0) {
5327 MaxEncoding = 32;
5328 Sign = -1;
5329 Offset = -Offset;
5330 }
5331 break;
5332 default:
5333 llvm_unreachable("Unsupported opcode");
5334 }
5335
5336 // `Offset` can be in bytes or in "scalable bytes".
5337 int VScale = 1;
5338 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5339 VScale = 16;
5340 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5341 VScale = 2;
5342
5343 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5344 // scratch register. If DestReg is a virtual register, use it as the
5345 // scratch register; otherwise, create a new virtual register (to be
5346 // replaced by the scavenger at the end of PEI). That case can be optimized
5347 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5348 // register can be loaded with offset%8 and the add/sub can use an extending
5349 // instruction with LSL#3.
5350 // Currently the function handles any offsets but generates a poor sequence
5351 // of code.
5352 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5353
5354 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5355 Register TmpReg = DestReg;
5356 if (TmpReg == AArch64::XZR)
5358 &AArch64::GPR64RegClass);
5359 do {
5360 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5361 unsigned LocalShiftSize = 0;
5362 if (ThisVal > MaxEncoding) {
5363 ThisVal = ThisVal >> ShiftSize;
5364 LocalShiftSize = ShiftSize;
5365 }
5366 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5367 "Encoding cannot handle value that big");
5368
5369 Offset -= ThisVal << LocalShiftSize;
5370 if (Offset == 0)
5371 TmpReg = DestReg;
5372 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5373 .addReg(SrcReg)
5374 .addImm(Sign * (int)ThisVal);
5375 if (ShiftSize)
5376 MBI = MBI.addImm(
5378 MBI = MBI.setMIFlag(Flag);
5379
5380 auto Change =
5381 VScale == 1
5382 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5383 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5384 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5385 CFAOffset += Change;
5386 else
5387 CFAOffset -= Change;
5388 if (EmitCFAOffset && DestReg == TmpReg) {
5389 MachineFunction &MF = *MBB.getParent();
5390 const TargetSubtargetInfo &STI = MF.getSubtarget();
5391 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5392
5393 unsigned CFIIndex = MF.addFrameInst(
5394 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5395 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5396 .addCFIIndex(CFIIndex)
5397 .setMIFlags(Flag);
5398 }
5399
5400 if (NeedsWinCFI) {
5401 assert(Sign == 1 && "SEH directives should always have a positive sign");
5402 int Imm = (int)(ThisVal << LocalShiftSize);
5403 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5404 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5405 if (HasWinCFI)
5406 *HasWinCFI = true;
5407 if (Imm == 0)
5408 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5409 else
5410 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5411 .addImm(Imm)
5412 .setMIFlag(Flag);
5413 assert(Offset == 0 && "Expected remaining offset to be zero to "
5414 "emit a single SEH directive");
5415 } else if (DestReg == AArch64::SP) {
5416 if (HasWinCFI)
5417 *HasWinCFI = true;
5418 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5419 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5420 .addImm(Imm)
5421 .setMIFlag(Flag);
5422 }
5423 }
5424
5425 SrcReg = TmpReg;
5426 } while (Offset);
5427}
5428
5431 unsigned DestReg, unsigned SrcReg,
5433 MachineInstr::MIFlag Flag, bool SetNZCV,
5434 bool NeedsWinCFI, bool *HasWinCFI,
5435 bool EmitCFAOffset, StackOffset CFAOffset,
5436 unsigned FrameReg) {
5437 // If a function is marked as arm_locally_streaming, then the runtime value of
5438 // vscale in the prologue/epilogue is different the runtime value of vscale
5439 // in the function's body. To avoid having to consider multiple vscales,
5440 // we can use `addsvl` to allocate any scalable stack-slots, which under
5441 // most circumstances will be only locals, not callee-save slots.
5442 const Function &F = MBB.getParent()->getFunction();
5443 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5444
5445 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5447 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5448
5449 // First emit non-scalable frame offsets, or a simple 'mov'.
5450 if (Bytes || (!Offset && SrcReg != DestReg)) {
5451 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5452 "SP increment/decrement not 8-byte aligned");
5453 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5454 if (Bytes < 0) {
5455 Bytes = -Bytes;
5456 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5457 }
5458 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5459 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5460 FrameReg);
5461 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5462 ? StackOffset::getFixed(-Bytes)
5463 : StackOffset::getFixed(Bytes);
5464 SrcReg = DestReg;
5465 FrameReg = DestReg;
5466 }
5467
5468 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5469 "SetNZCV not supported with SVE vectors");
5470 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5471 "WinCFI not supported with SVE vectors");
5472
5473 if (NumDataVectors) {
5474 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5475 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5476 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5477 CFAOffset, FrameReg);
5478 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5479 SrcReg = DestReg;
5480 }
5481
5482 if (NumPredicateVectors) {
5483 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5484 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5485 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5486 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5487 CFAOffset, FrameReg);
5488 }
5489}
5490
5493 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5494 LiveIntervals *LIS, VirtRegMap *VRM) const {
5495 // This is a bit of a hack. Consider this instruction:
5496 //
5497 // %0 = COPY %sp; GPR64all:%0
5498 //
5499 // We explicitly chose GPR64all for the virtual register so such a copy might
5500 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5501 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5502 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5503 //
5504 // To prevent that, we are going to constrain the %0 register class here.
5505 if (MI.isFullCopy()) {
5506 Register DstReg = MI.getOperand(0).getReg();
5507 Register SrcReg = MI.getOperand(1).getReg();
5508 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5509 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5510 return nullptr;
5511 }
5512 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5513 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5514 return nullptr;
5515 }
5516 // Nothing can folded with copy from/to NZCV.
5517 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5518 return nullptr;
5519 }
5520
5521 // Handle the case where a copy is being spilled or filled but the source
5522 // and destination register class don't match. For example:
5523 //
5524 // %0 = COPY %xzr; GPR64common:%0
5525 //
5526 // In this case we can still safely fold away the COPY and generate the
5527 // following spill code:
5528 //
5529 // STRXui %xzr, %stack.0
5530 //
5531 // This also eliminates spilled cross register class COPYs (e.g. between x and
5532 // d regs) of the same size. For example:
5533 //
5534 // %0 = COPY %1; GPR64:%0, FPR64:%1
5535 //
5536 // will be filled as
5537 //
5538 // LDRDui %0, fi<#0>
5539 //
5540 // instead of
5541 //
5542 // LDRXui %Temp, fi<#0>
5543 // %0 = FMOV %Temp
5544 //
5545 if (MI.isCopy() && Ops.size() == 1 &&
5546 // Make sure we're only folding the explicit COPY defs/uses.
5547 (Ops[0] == 0 || Ops[0] == 1)) {
5548 bool IsSpill = Ops[0] == 0;
5549 bool IsFill = !IsSpill;
5551 const MachineRegisterInfo &MRI = MF.getRegInfo();
5552 MachineBasicBlock &MBB = *MI.getParent();
5553 const MachineOperand &DstMO = MI.getOperand(0);
5554 const MachineOperand &SrcMO = MI.getOperand(1);
5555 Register DstReg = DstMO.getReg();
5556 Register SrcReg = SrcMO.getReg();
5557 // This is slightly expensive to compute for physical regs since
5558 // getMinimalPhysRegClass is slow.
5559 auto getRegClass = [&](unsigned Reg) {
5560 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5561 : TRI.getMinimalPhysRegClass(Reg);
5562 };
5563
5564 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5565 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5566 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5567 "Mismatched register size in non subreg COPY");
5568 if (IsSpill)
5569 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5570 getRegClass(SrcReg), &TRI, Register());
5571 else
5572 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5573 getRegClass(DstReg), &TRI, Register());
5574 return &*--InsertPt;
5575 }
5576
5577 // Handle cases like spilling def of:
5578 //
5579 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5580 //
5581 // where the physical register source can be widened and stored to the full
5582 // virtual reg destination stack slot, in this case producing:
5583 //
5584 // STRXui %xzr, %stack.0
5585 //
5586 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5587 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
5588 assert(SrcMO.getSubReg() == 0 &&
5589 "Unexpected subreg on physical register");
5590 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5591 FrameIndex, &AArch64::GPR64RegClass, &TRI,
5592 Register());
5593 return &*--InsertPt;
5594 }
5595
5596 // Handle cases like filling use of:
5597 //
5598 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5599 //
5600 // where we can load the full virtual reg source stack slot, into the subreg
5601 // destination, in this case producing:
5602 //
5603 // LDRWui %0:sub_32<def,read-undef>, %stack.0
5604 //
5605 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
5606 const TargetRegisterClass *FillRC;
5607 switch (DstMO.getSubReg()) {
5608 default:
5609 FillRC = nullptr;
5610 break;
5611 case AArch64::sub_32:
5612 FillRC = &AArch64::GPR32RegClass;
5613 break;
5614 case AArch64::ssub:
5615 FillRC = &AArch64::FPR32RegClass;
5616 break;
5617 case AArch64::dsub:
5618 FillRC = &AArch64::FPR64RegClass;
5619 break;
5620 }
5621
5622 if (FillRC) {
5623 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5624 TRI.getRegSizeInBits(*FillRC) &&
5625 "Mismatched regclass size on folded subreg COPY");
5626 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
5627 Register());
5628 MachineInstr &LoadMI = *--InsertPt;
5629 MachineOperand &LoadDst = LoadMI.getOperand(0);
5630 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
5631 LoadDst.setSubReg(DstMO.getSubReg());
5632 LoadDst.setIsUndef();
5633 return &LoadMI;
5634 }
5635 }
5636 }
5637
5638 // Cannot fold.
5639 return nullptr;
5640}
5641
5643 StackOffset &SOffset,
5644 bool *OutUseUnscaledOp,
5645 unsigned *OutUnscaledOp,
5646 int64_t *EmittableOffset) {
5647 // Set output values in case of early exit.
5648 if (EmittableOffset)
5649 *EmittableOffset = 0;
5650 if (OutUseUnscaledOp)
5651 *OutUseUnscaledOp = false;
5652 if (OutUnscaledOp)
5653 *OutUnscaledOp = 0;
5654
5655 // Exit early for structured vector spills/fills as they can't take an
5656 // immediate offset.
5657 switch (MI.getOpcode()) {
5658 default:
5659 break;
5660 case AArch64::LD1Rv1d:
5661 case AArch64::LD1Rv2s:
5662 case AArch64::LD1Rv2d:
5663 case AArch64::LD1Rv4h:
5664 case AArch64::LD1Rv4s:
5665 case AArch64::LD1Rv8b:
5666 case AArch64::LD1Rv8h:
5667 case AArch64::LD1Rv16b:
5668 case AArch64::LD1Twov2d:
5669 case AArch64::LD1Threev2d:
5670 case AArch64::LD1Fourv2d:
5671 case AArch64::LD1Twov1d:
5672 case AArch64::LD1Threev1d:
5673 case AArch64::LD1Fourv1d:
5674 case AArch64::ST1Twov2d:
5675 case AArch64::ST1Threev2d:
5676 case AArch64::ST1Fourv2d:
5677 case AArch64::ST1Twov1d:
5678 case AArch64::ST1Threev1d:
5679 case AArch64::ST1Fourv1d:
5680 case AArch64::ST1i8:
5681 case AArch64::ST1i16:
5682 case AArch64::ST1i32:
5683 case AArch64::ST1i64:
5684 case AArch64::IRG:
5685 case AArch64::IRGstack:
5686 case AArch64::STGloop:
5687 case AArch64::STZGloop:
5689 }
5690
5691 // Get the min/max offset and the scale.
5692 TypeSize ScaleValue(0U, false), Width(0U, false);
5693 int64_t MinOff, MaxOff;
5694 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
5695 MaxOff))
5696 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5697
5698 // Construct the complete offset.
5699 bool IsMulVL = ScaleValue.isScalable();
5700 unsigned Scale = ScaleValue.getKnownMinValue();
5701 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5702
5703 const MachineOperand &ImmOpnd =
5704 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
5705 Offset += ImmOpnd.getImm() * Scale;
5706
5707 // If the offset doesn't match the scale, we rewrite the instruction to
5708 // use the unscaled instruction instead. Likewise, if we have a negative
5709 // offset and there is an unscaled op to use.
5710 std::optional<unsigned> UnscaledOp =
5712 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
5713 if (useUnscaledOp &&
5714 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
5715 MaxOff))
5716 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5717
5718 Scale = ScaleValue.getKnownMinValue();
5719 assert(IsMulVL == ScaleValue.isScalable() &&
5720 "Unscaled opcode has different value for scalable");
5721
5722 int64_t Remainder = Offset % Scale;
5723 assert(!(Remainder && useUnscaledOp) &&
5724 "Cannot have remainder when using unscaled op");
5725
5726 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5727 int64_t NewOffset = Offset / Scale;
5728 if (MinOff <= NewOffset && NewOffset <= MaxOff)
5729 Offset = Remainder;
5730 else {
5731 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
5732 Offset = Offset - NewOffset * Scale;
5733 }
5734
5735 if (EmittableOffset)
5736 *EmittableOffset = NewOffset;
5737 if (OutUseUnscaledOp)
5738 *OutUseUnscaledOp = useUnscaledOp;
5739 if (OutUnscaledOp && UnscaledOp)
5740 *OutUnscaledOp = *UnscaledOp;
5741
5742 if (IsMulVL)
5743 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
5744 else
5745 SOffset = StackOffset::get(Offset, SOffset.getScalable());
5747 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
5748}
5749
5751 unsigned FrameReg, StackOffset &Offset,
5752 const AArch64InstrInfo *TII) {
5753 unsigned Opcode = MI.getOpcode();
5754 unsigned ImmIdx = FrameRegIdx + 1;
5755
5756 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
5757 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
5758 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5759 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
5760 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5761 MI.eraseFromParent();
5762 Offset = StackOffset();
5763 return true;
5764 }
5765
5766 int64_t NewOffset;
5767 unsigned UnscaledOp;
5768 bool UseUnscaledOp;
5769 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
5770 &UnscaledOp, &NewOffset);
5773 // Replace the FrameIndex with FrameReg.
5774 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
5775 if (UseUnscaledOp)
5776 MI.setDesc(TII->get(UnscaledOp));
5777
5778 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
5779 return !Offset;
5780 }
5781
5782 return false;
5783}
5784
5787 DebugLoc DL;
5788 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
5789}
5790
5792 return MCInstBuilder(AArch64::HINT).addImm(0);
5793}
5794
5795// AArch64 supports MachineCombiner.
5796bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5797
5798// True when Opc sets flag
5799static bool isCombineInstrSettingFlag(unsigned Opc) {
5800 switch (Opc) {
5801 case AArch64::ADDSWrr:
5802 case AArch64::ADDSWri:
5803 case AArch64::ADDSXrr:
5804 case AArch64::ADDSXri:
5805 case AArch64::SUBSWrr:
5806 case AArch64::SUBSXrr:
5807 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5808 case AArch64::SUBSWri:
5809 case AArch64::SUBSXri:
5810 return true;
5811 default:
5812 break;
5813 }
5814 return false;
5815}
5816
5817// 32b Opcodes that can be combined with a MUL
5818static bool isCombineInstrCandidate32(unsigned Opc) {
5819 switch (Opc) {
5820 case AArch64::ADDWrr:
5821 case AArch64::ADDWri:
5822 case AArch64::SUBWrr:
5823 case AArch64::ADDSWrr:
5824 case AArch64::ADDSWri:
5825 case AArch64::SUBSWrr:
5826 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5827 case AArch64::SUBWri:
5828 case AArch64::SUBSWri:
5829 return true;
5830 default:
5831 break;
5832 }
5833 return false;
5834}
5835
5836// 64b Opcodes that can be combined with a MUL
5837static bool isCombineInstrCandidate64(unsigned Opc) {
5838 switch (Opc) {
5839 case AArch64::ADDXrr:
5840 case AArch64::ADDXri:
5841 case AArch64::SUBXrr:
5842 case AArch64::ADDSXrr:
5843 case AArch64::ADDSXri:
5844 case AArch64::SUBSXrr:
5845 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5846 case AArch64::SUBXri:
5847 case AArch64::SUBSXri:
5848 case AArch64::ADDv8i8:
5849 case AArch64::ADDv16i8:
5850 case AArch64::ADDv4i16:
5851 case AArch64::ADDv8i16:
5852 case AArch64::ADDv2i32:
5853 case AArch64::ADDv4i32:
5854 case AArch64::SUBv8i8:
5855 case AArch64::SUBv16i8:
5856 case AArch64::SUBv4i16:
5857 case AArch64::SUBv8i16:
5858 case AArch64::SUBv2i32:
5859 case AArch64::SUBv4i32:
5860 return true;
5861 default:
5862 break;
5863 }
5864 return false;
5865}
5866
5867// FP Opcodes that can be combined with a FMUL.
5868static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5869 switch (Inst.getOpcode()) {
5870 default:
5871 break;
5872 case AArch64::FADDHrr:
5873 case AArch64::FADDSrr:
5874 case AArch64::FADDDrr:
5875 case AArch64::FADDv4f16:
5876 case AArch64::FADDv8f16:
5877 case AArch64::FADDv2f32:
5878 case AArch64::FADDv2f64:
5879 case AArch64::FADDv4f32:
5880 case AArch64::FSUBHrr:
5881 case AArch64::FSUBSrr:
5882 case AArch64::FSUBDrr:
5883 case AArch64::FSUBv4f16:
5884 case AArch64::FSUBv8f16:
5885 case AArch64::FSUBv2f32:
5886 case AArch64::FSUBv2f64:
5887 case AArch64::FSUBv4f32:
5889 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5890 // the target options or if FADD/FSUB has the contract fast-math flag.
5891 return Options.UnsafeFPMath ||
5892 Options.AllowFPOpFusion == FPOpFusion::Fast ||
5894 return true;
5895 }
5896 return false;
5897}
5898
5899// Opcodes that can be combined with a MUL
5900static bool isCombineInstrCandidate(unsigned Opc) {
5902}
5903
5904//
5905// Utility routine that checks if \param MO is defined by an
5906// \param CombineOpc instruction in the basic block \param MBB
5908 unsigned CombineOpc, unsigned ZeroReg = 0,
5909 bool CheckZeroReg = false) {
5911 MachineInstr *MI = nullptr;
5912
5913 if (MO.isReg() && MO.getReg().isVirtual())
5914 MI = MRI.getUniqueVRegDef(MO.getReg());
5915 // And it needs to be in the trace (otherwise, it won't have a depth).
5916 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
5917 return false;
5918 // Must only used by the user we combine with.
5919 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
5920 return false;
5921
5922 if (CheckZeroReg) {
5923 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
5924 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
5925 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
5926 // The third input reg must be zero.
5927 if (MI->getOperand(3).getReg() != ZeroReg)
5928 return false;
5929 }
5930
5931 if (isCombineInstrSettingFlag(CombineOpc) &&
5932 MI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
5933 return false;
5934
5935 return true;
5936}
5937
5938//
5939// Is \param MO defined by an integer multiply and can be combined?
5941 unsigned MulOpc, unsigned ZeroReg) {
5942 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
5943}
5944
5945//
5946// Is \param MO defined by a floating-point multiply and can be combined?
5948 unsigned MulOpc) {
5949 return canCombine(MBB, MO, MulOpc);
5950}
5951
5952// TODO: There are many more machine instruction opcodes to match:
5953// 1. Other data types (integer, vectors)
5954// 2. Other math / logic operations (xor, or)
5955// 3. Other forms of the same operation (intrinsics and other variants)
5957 bool Invert) const {
5958 if (Invert)
5959 return false;
5960 switch (Inst.getOpcode()) {
5961 // == Floating-point types ==
5962 // -- Floating-point instructions --
5963 case AArch64::FADDHrr:
5964 case AArch64::FADDSrr:
5965 case AArch64::FADDDrr:
5966 case AArch64::FMULHrr:
5967 case AArch64::FMULSrr:
5968 case AArch64::FMULDrr:
5969 case AArch64::FMULX16:
5970 case AArch64::FMULX32:
5971 case AArch64::FMULX64:
5972 // -- Advanced SIMD instructions --
5973 case AArch64::FADDv4f16:
5974 case AArch64::FADDv8f16:
5975 case AArch64::FADDv2f32:
5976 case AArch64::FADDv4f32:
5977 case AArch64::FADDv2f64:
5978 case AArch64::FMULv4f16:
5979 case AArch64::FMULv8f16:
5980 case AArch64::FMULv2f32:
5981 case AArch64::FMULv4f32:
5982 case AArch64::FMULv2f64:
5983 case AArch64::FMULXv4f16:
5984 case AArch64::FMULXv8f16:
5985 case AArch64::FMULXv2f32:
5986 case AArch64::FMULXv4f32:
5987 case AArch64::FMULXv2f64:
5988 // -- SVE instructions --
5989 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
5990 // in the SVE instruction set (though there are predicated ones).
5991 case AArch64::FADD_ZZZ_H:
5992 case AArch64::FADD_ZZZ_S:
5993 case AArch64::FADD_ZZZ_D:
5994 case AArch64::FMUL_ZZZ_H:
5995 case AArch64::FMUL_ZZZ_S:
5996 case AArch64::FMUL_ZZZ_D:
5997 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6000
6001 // == Integer types ==
6002 // -- Base instructions --
6003 // Opcodes MULWrr and MULXrr don't exist because
6004 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6005 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6006 // The machine-combiner does not support three-source-operands machine
6007 // instruction. So we cannot reassociate MULs.
6008 case AArch64::ADDWrr:
6009 case AArch64::ADDXrr:
6010 case AArch64::ANDWrr:
6011 case AArch64::ANDXrr:
6012 case AArch64::ORRWrr:
6013 case AArch64::ORRXrr:
6014 case AArch64::EORWrr:
6015 case AArch64::EORXrr:
6016 case AArch64::EONWrr:
6017 case AArch64::EONXrr:
6018 // -- Advanced SIMD instructions --
6019 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6020 // in the Advanced SIMD instruction set.
6021 case AArch64::ADDv8i8:
6022 case AArch64::ADDv16i8:
6023 case AArch64::ADDv4i16:
6024 case AArch64::ADDv8i16:
6025 case AArch64::ADDv2i32:
6026 case AArch64::ADDv4i32:
6027 case AArch64::ADDv1i64:
6028 case AArch64::ADDv2i64:
6029 case AArch64::MULv8i8:
6030 case AArch64::MULv16i8:
6031 case AArch64::MULv4i16:
6032 case AArch64::MULv8i16:
6033 case AArch64::MULv2i32:
6034 case AArch64::MULv4i32:
6035 case AArch64::ANDv8i8:
6036 case AArch64::ANDv16i8:
6037 case AArch64::ORRv8i8:
6038 case AArch64::ORRv16i8:
6039 case AArch64::EORv8i8:
6040 case AArch64::EORv16i8:
6041 // -- SVE instructions --
6042 case AArch64::ADD_ZZZ_B:
6043 case AArch64::ADD_ZZZ_H:
6044 case AArch64::ADD_ZZZ_S:
6045 case AArch64::ADD_ZZZ_D:
6046 case AArch64::MUL_ZZZ_B:
6047 case AArch64::MUL_ZZZ_H:
6048 case AArch64::MUL_ZZZ_S:
6049 case AArch64::MUL_ZZZ_D:
6050 case AArch64::AND_ZZZ:
6051 case AArch64::ORR_ZZZ:
6052 case AArch64::EOR_ZZZ:
6053 return true;
6054
6055 default:
6056 return false;
6057 }
6058}
6059
6060/// Find instructions that can be turned into madd.
6063 unsigned Opc = Root.getOpcode();
6064 MachineBasicBlock &MBB = *Root.getParent();
6065 bool Found = false;
6066
6067 if (!isCombineInstrCandidate(Opc))
6068 return false;
6069 if (isCombineInstrSettingFlag(Opc)) {
6070 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
6071 // When NZCV is live bail out.
6072 if (Cmp_NZCV == -1)
6073 return false;
6074 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6075 // When opcode can't change bail out.
6076 // CHECKME: do we miss any cases for opcode conversion?
6077 if (NewOpc == Opc)
6078 return false;
6079 Opc = NewOpc;
6080 }
6081
6082 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6084 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6085 Patterns.push_back(Pattern);
6086 Found = true;
6087 }
6088 };
6089
6090 auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
6091 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6092 Patterns.push_back(Pattern);
6093 Found = true;
6094 }
6095 };
6096
6097 typedef MachineCombinerPattern MCP;
6098
6099 switch (Opc) {
6100 default:
6101 break;
6102 case AArch64::ADDWrr:
6103 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6104 "ADDWrr does not have register operands");
6105 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6106 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6107 break;
6108 case AArch64::ADDXrr:
6109 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6110 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6111 break;
6112 case AArch64::SUBWrr:
6113 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6114 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6115 break;
6116 case AArch64::SUBXrr:
6117 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6118 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6119 break;
6120 case AArch64::ADDWri:
6121 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6122 break;
6123 case AArch64::ADDXri:
6124 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6125 break;
6126 case AArch64::SUBWri:
6127 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6128 break;
6129 case AArch64::SUBXri:
6130 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6131 break;
6132 case AArch64::ADDv8i8:
6133 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6134 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6135 break;
6136 case AArch64::ADDv16i8:
6137 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6138 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6139 break;
6140 case AArch64::ADDv4i16:
6141 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6142 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6143 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6144 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6145 break;
6146 case AArch64::ADDv8i16:
6147 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6148 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6149 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6150 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6151 break;
6152 case AArch64::ADDv2i32:
6153 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6154 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6155 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6156 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6157 break;
6158 case AArch64::ADDv4i32:
6159 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6160 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6161 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6162 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6163 break;
6164 case AArch64::SUBv8i8:
6165 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6166 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6167 break;
6168 case AArch64::SUBv16i8:
6169 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6170 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6171 break;
6172 case AArch64::SUBv4i16:
6173 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6174 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6175 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6176 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6177 break;
6178 case AArch64::SUBv8i16:
6179 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6180 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6181 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6182 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6183 break;
6184 case AArch64::SUBv2i32:
6185 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6186 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6187 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6188 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6189 break;
6190 case AArch64::SUBv4i32:
6191 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6192 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6193 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6194 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6195 break;
6196 }
6197 return Found;
6198}
6199/// Floating-Point Support
6200
6201/// Find instructions that can be turned into madd.
6204
6205 if (!isCombineInstrCandidateFP(Root))
6206 return false;
6207
6208 MachineBasicBlock &MBB = *Root.getParent();
6209 bool Found = false;
6210
6211 auto Match = [&](int Opcode, int Operand,
6213 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6214 Patterns.push_back(Pattern);
6215 return true;
6216 }
6217 return false;
6218 };
6219
6220 typedef MachineCombinerPattern MCP;
6221
6222 switch (Root.getOpcode()) {
6223 default:
6224 assert(false && "Unsupported FP instruction in combiner\n");
6225 break;
6226 case AArch64::FADDHrr:
6227 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6228 "FADDHrr does not have register operands");
6229
6230 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6231 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6232 break;
6233 case AArch64::FADDSrr:
6234 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6235 "FADDSrr does not have register operands");
6236
6237 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6238 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6239
6240 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6241 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6242 break;
6243 case AArch64::FADDDrr:
6244 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6245 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6246
6247 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6248 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6249 break;
6250 case AArch64::FADDv4f16:
6251 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6252 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6253
6254 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6255 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6256 break;
6257 case AArch64::FADDv8f16:
6258 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6259 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6260
6261 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6262 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6263 break;
6264 case AArch64::FADDv2f32:
6265 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6266 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6267
6268 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6269 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6270 break;
6271 case AArch64::FADDv2f64:
6272 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6273 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6274
6275 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6276 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6277 break;
6278 case AArch64::FADDv4f32:
6279 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6280 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6281
6282 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6283 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6284 break;
6285 case AArch64::FSUBHrr:
6286 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6287 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6288 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6289 break;
6290 case AArch64::FSUBSrr:
6291 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6292
6293 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6294 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6295
6296 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6297 break;
6298 case AArch64::FSUBDrr:
6299 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6300
6301 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6302 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6303
6304 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6305 break;
6306 case AArch64::FSUBv4f16:
6307 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6308 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6309
6310 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6311 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6312 break;
6313 case AArch64::FSUBv8f16:
6314 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6315 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6316
6317 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6318 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6319 break;
6320 case AArch64::FSUBv2f32:
6321 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6322 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6323
6324 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6325 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6326 break;
6327 case AArch64::FSUBv2f64:
6328 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6329 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6330
6331 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6332 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6333 break;
6334 case AArch64::FSUBv4f32:
6335 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6336 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6337
6338 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6339 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6340 break;
6341 }
6342 return Found;
6343}
6344
6347 MachineBasicBlock &MBB = *Root.getParent();
6348 bool Found = false;
6349
6350 auto Match = [&](unsigned Opcode, int Operand,
6353 MachineOperand &MO = Root.getOperand(Operand);
6354 MachineInstr *MI = nullptr;
6355 if (MO.isReg() && MO.getReg().isVirtual())
6356 MI = MRI.getUniqueVRegDef(MO.getReg());
6357 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6358 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6359 MI->getOperand(1).getReg().isVirtual())
6360 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6361 if (MI && MI->getOpcode() == Opcode) {
6362 Patterns.push_back(Pattern);
6363 return true;
6364 }
6365 return false;
6366 };
6367
6368 typedef MachineCombinerPattern MCP;
6369
6370 switch (Root.getOpcode()) {
6371 default:
6372 return false;
6373 case AArch64::FMULv2f32:
6374 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6375 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6376 break;
6377 case AArch64::FMULv2f64:
6378 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6379 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6380 break;
6381 case AArch64::FMULv4f16:
6382 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6383 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6384 break;
6385 case AArch64::FMULv4f32:
6386 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6387 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6388 break;
6389 case AArch64::FMULv8f16:
6390 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6391 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6392 break;
6393 }
6394
6395 return Found;
6396}
6397
6400 unsigned Opc = Root.getOpcode();
6401 MachineBasicBlock &MBB = *Root.getParent();
6403
6404 auto Match = [&](unsigned Opcode, MachineCombinerPattern Pattern) -> bool {
6405 MachineOperand &MO = Root.getOperand(1);
6406 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6407 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6408 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6412 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6413 Patterns.push_back(Pattern);
6414 return true;
6415 }
6416 return false;
6417 };
6418
6419 switch (Opc) {
6420 default:
6421 break;
6422 case AArch64::FNEGDr:
6423 return Match(AArch64::FMADDDrrr, MachineCombinerPattern::FNMADD);
6424 case AArch64::FNEGSr:
6425 return Match(AArch64::FMADDSrrr, MachineCombinerPattern::FNMADD);
6426 }
6427
6428 return false;
6429}
6430
6431/// Return true when a code sequence can improve throughput. It
6432/// should be called only for instructions in loops.
6433/// \param Pattern - combiner pattern
6436 switch (Pattern) {
6437 default:
6438 break;
6544 return true;
6545 } // end switch (Pattern)
6546 return false;
6547}
6548
6549/// Find other MI combine patterns.
6552{
6553 // A - (B + C) ==> (A - B) - C or (A - C) - B
6554 unsigned Opc = Root.getOpcode();
6555 MachineBasicBlock &MBB = *Root.getParent();
6556
6557 switch (Opc) {
6558 case AArch64::SUBWrr:
6559 case AArch64::SUBSWrr:
6560 case AArch64::SUBXrr:
6561 case AArch64::SUBSXrr:
6562 // Found candidate root.
6563 break;
6564 default:
6565 return false;
6566 }
6567
6568 if (isCombineInstrSettingFlag(Opc) &&
6569 Root.findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
6570 return false;
6571
6572 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6573 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6574 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6575 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6578 return true;
6579 }
6580
6581 return false;
6582}
6583
6584/// Return true when there is potentially a faster code sequence for an
6585/// instruction chain ending in \p Root. All potential patterns are listed in
6586/// the \p Pattern vector. Pattern should be sorted in priority order since the
6587/// pattern evaluator stops checking as soon as it finds a faster sequence.
6588
6591 bool DoRegPressureReduce) const {
6592 // Integer patterns
6593 if (getMaddPatterns(Root, Patterns))
6594 return true;
6595 // Floating point patterns
6596 if (getFMULPatterns(Root, Patterns))
6597 return true;
6598 if (getFMAPatterns(Root, Patterns))
6599 return true;
6600 if (getFNEGPatterns(Root, Patterns))
6601 return true;
6602
6603 // Other patterns
6604 if (getMiscPatterns(Root, Patterns))
6605 return true;
6606
6607 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6608 DoRegPressureReduce);
6609}
6610
6612/// genFusedMultiply - Generate fused multiply instructions.
6613/// This function supports both integer and floating point instructions.
6614/// A typical example:
6615/// F|MUL I=A,B,0
6616/// F|ADD R,I,C
6617/// ==> F|MADD R,A,B,C
6618/// \param MF Containing MachineFunction
6619/// \param MRI Register information
6620/// \param TII Target information
6621/// \param Root is the F|ADD instruction
6622/// \param [out] InsInstrs is a vector of machine instructions and will
6623/// contain the generated madd instruction
6624/// \param IdxMulOpd is index of operand in Root that is the result of
6625/// the F|MUL. In the example above IdxMulOpd is 1.
6626/// \param MaddOpc the opcode fo the f|madd instruction
6627/// \param RC Register class of operands
6628/// \param kind of fma instruction (addressing mode) to be generated
6629/// \param ReplacedAddend is the result register from the instruction
6630/// replacing the non-combined operand, if any.
6631static MachineInstr *
6633 const TargetInstrInfo *TII, MachineInstr &Root,
6634 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
6635 unsigned MaddOpc, const TargetRegisterClass *RC,
6636 FMAInstKind kind = FMAInstKind::Default,
6637 const Register *ReplacedAddend = nullptr) {
6638 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6639
6640 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
6641 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6642 Register ResultReg = Root.getOperand(0).getReg();
6643 Register SrcReg0 = MUL->getOperand(1).getReg();
6644 bool Src0IsKill = MUL->getOperand(1).isKill();
6645 Register SrcReg1 = MUL->getOperand(2).getReg();
6646 bool Src1IsKill = MUL->getOperand(2).isKill();
6647
6648 Register SrcReg2;
6649 bool Src2IsKill;
6650 if (ReplacedAddend) {
6651 // If we just generated a new addend, we must be it's only use.
6652 SrcReg2 = *ReplacedAddend;
6653 Src2IsKill = true;
6654 } else {
6655 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
6656 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
6657 }
6658
6659 if (ResultReg.isVirtual())
6660 MRI.constrainRegClass(ResultReg, RC);
6661 if (SrcReg0.isVirtual())
6662 MRI.constrainRegClass(SrcReg0, RC);
6663 if (SrcReg1.isVirtual())
6664 MRI.constrainRegClass(SrcReg1, RC);
6665 if (SrcReg2.isVirtual())
6666 MRI.constrainRegClass(SrcReg2, RC);
6667
6669 if (kind == FMAInstKind::Default)
6670 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6671 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6672 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6673 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6674 else if (kind == FMAInstKind::Indexed)
6675 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6676 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6677 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6678 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6679 .addImm(MUL->getOperand(3).getImm());
6680 else if (kind == FMAInstKind::Accumulator)
6681 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6682 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6683 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6684 .addReg(SrcReg1, getKillRegState(Src1IsKill));
6685 else
6686 assert(false && "Invalid FMA instruction kind \n");
6687 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6688 InsInstrs.push_back(MIB);
6689 return MUL;
6690}
6691
6692static MachineInstr *
6694 const TargetInstrInfo *TII, MachineInstr &Root,
6696 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6697
6698 unsigned Opc = 0;
6699 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
6700 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6701 Opc = AArch64::FNMADDSrrr;
6702 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6703 Opc = AArch64::FNMADDDrrr;
6704 else
6705 return nullptr;
6706
6707 Register ResultReg = Root.getOperand(0).getReg();
6708 Register SrcReg0 = MAD->getOperand(1).getReg();
6709 Register SrcReg1 = MAD->getOperand(2).getReg();
6710 Register SrcReg2 = MAD->getOperand(3).getReg();
6711 bool Src0IsKill = MAD->getOperand(1).isKill();
6712 bool Src1IsKill = MAD->getOperand(2).isKill();
6713 bool Src2IsKill = MAD->getOperand(3).isKill();
6714 if (ResultReg.isVirtual())
6715 MRI.constrainRegClass(ResultReg, RC);
6716 if (SrcReg0.isVirtual())
6717 MRI.constrainRegClass(SrcReg0, RC);
6718 if (SrcReg1.isVirtual())
6719 MRI.constrainRegClass(SrcReg1, RC);
6720 if (SrcReg2.isVirtual())
6721 MRI.constrainRegClass(SrcReg2, RC);
6722
6724 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
6725 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6726 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6727 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6728 InsInstrs.push_back(MIB);
6729
6730 return MAD;
6731}
6732
6733/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6734static MachineInstr *
6737 unsigned IdxDupOp, unsigned MulOpc,
6739 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
6740 "Invalid index of FMUL operand");
6741
6742 MachineFunction &MF = *Root.getMF();
6744
6745 MachineInstr *Dup =
6746 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
6747
6748 if (Dup->getOpcode() == TargetOpcode::COPY)
6749 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
6750
6751 Register DupSrcReg = Dup->getOperand(1).getReg();
6752 MRI.clearKillFlags(DupSrcReg);
6753 MRI.constrainRegClass(DupSrcReg, RC);
6754
6755 unsigned DupSrcLane = Dup->getOperand(2).getImm();
6756
6757 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
6758 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
6759
6760 Register ResultReg = Root.getOperand(0).getReg();
6761
6763 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
6764 .add(MulOp)
6765 .addReg(DupSrcReg)
6766 .addImm(DupSrcLane);
6767
6768 InsInstrs.push_back(MIB);
6769 return &Root;
6770}
6771
6772/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6773/// instructions.
6774///
6775/// \see genFusedMultiply
6779 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6780 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6781 FMAInstKind::Accumulator);
6782}
6783
6784/// genNeg - Helper to generate an intermediate negation of the second operand
6785/// of Root
6787 const TargetInstrInfo *TII, MachineInstr &Root,
6789 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6790 unsigned MnegOpc, const TargetRegisterClass *RC) {
6791 Register NewVR = MRI.createVirtualRegister(RC);
6793 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
6794 .add(Root.getOperand(2));
6795 InsInstrs.push_back(MIB);
6796
6797 assert(InstrIdxForVirtReg.empty());
6798 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6799
6800 return NewVR;
6801}
6802
6803/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6804/// instructions with an additional negation of the accumulator
6808 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6809 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6810 assert(IdxMulOpd == 1);
6811
6812 Register NewVR =
6813 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6814 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6815 FMAInstKind::Accumulator, &NewVR);
6816}
6817
6818/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6819/// instructions.
6820///
6821/// \see genFusedMultiply
6825 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6826 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6827 FMAInstKind::Indexed);
6828}
6829
6830/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6831/// instructions with an additional negation of the accumulator
6835 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6836 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6837 assert(IdxMulOpd == 1);
6838
6839 Register NewVR =
6840 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6841
6842 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6843 FMAInstKind::Indexed, &NewVR);
6844}
6845
6846/// genMaddR - Generate madd instruction and combine mul and add using
6847/// an extra virtual register
6848/// Example - an ADD intermediate needs to be stored in a register:
6849/// MUL I=A,B,0
6850/// ADD R,I,Imm
6851/// ==> ORR V, ZR, Imm
6852/// ==> MADD R,A,B,V
6853/// \param MF Containing MachineFunction
6854/// \param MRI Register information
6855/// \param TII Target information
6856/// \param Root is the ADD instruction
6857/// \param [out] InsInstrs is a vector of machine instructions and will
6858/// contain the generated madd instruction
6859/// \param IdxMulOpd is index of operand in Root that is the result of
6860/// the MUL. In the example above IdxMulOpd is 1.
6861/// \param MaddOpc the opcode fo the madd instruction
6862/// \param VR is a virtual register that holds the value of an ADD operand
6863/// (V in the example above).
6864/// \param RC Register class of operands
6866 const TargetInstrInfo *TII, MachineInstr &Root,
6868 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6869 const TargetRegisterClass *RC) {
6870 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6871
6872 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6873 Register ResultReg = Root.getOperand(0).getReg();
6874 Register SrcReg0 = MUL->getOperand(1).getReg();
6875 bool Src0IsKill = MUL->getOperand(1).isKill();
6876 Register SrcReg1 = MUL->getOperand(2).getReg();
6877 bool Src1IsKill = MUL->getOperand(2).isKill();
6878
6879 if (ResultReg.isVirtual())
6880 MRI.constrainRegClass(ResultReg, RC);
6881 if (SrcReg0.isVirtual())
6882 MRI.constrainRegClass(SrcReg0, RC);
6883 if (SrcReg1.isVirtual())
6884 MRI.constrainRegClass(SrcReg1, RC);
6886 MRI.constrainRegClass(VR, RC);
6887
6889 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6890 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6891 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6892 .addReg(VR);
6893 // Insert the MADD
6894 InsInstrs.push_back(MIB);
6895 return MUL;
6896}
6897
6898/// Do the following transformation
6899/// A - (B + C) ==> (A - B) - C
6900/// A - (B + C) ==> (A - C) - B
6901static void
6903 const TargetInstrInfo *TII, MachineInstr &Root,
6906 unsigned IdxOpd1,
6907 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6908 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
6909 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
6910 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6911
6912 Register ResultReg = Root.getOperand(0).getReg();
6913 Register RegA = Root.getOperand(1).getReg();
6914 bool RegAIsKill = Root.getOperand(1).isKill();
6915 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
6916 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
6917 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
6918 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
6919 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
6920
6921 unsigned Opcode = Root.getOpcode();
6922 if (Opcode == AArch64::SUBSWrr)
6923 Opcode = AArch64::SUBWrr;
6924 else if (Opcode == AArch64::SUBSXrr)
6925 Opcode = AArch64::SUBXrr;
6926 else
6927 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
6928 "Unexpected instruction opcode.");
6929
6930 MachineInstrBuilder MIB1 =
6931 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
6932 .addReg(RegA, getKillRegState(RegAIsKill))
6933 .addReg(RegB, getKillRegState(RegBIsKill));
6934 MachineInstrBuilder MIB2 =
6935 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
6936 .addReg(NewVR, getKillRegState(true))
6937 .addReg(RegC, getKillRegState(RegCIsKill));
6938
6939 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6940 InsInstrs.push_back(MIB1);
6941 InsInstrs.push_back(MIB2);
6942 DelInstrs.push_back(AddMI);
6943}
6944
6945/// When getMachineCombinerPatterns() finds potential patterns,
6946/// this function generates the instructions that could replace the
6947/// original code sequence
6952 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6953 MachineBasicBlock &MBB = *Root.getParent();
6955 MachineFunction &MF = *MBB.getParent();
6957
6958 MachineInstr *MUL = nullptr;
6959 const TargetRegisterClass *RC;
6960 unsigned Opc;
6961 switch (Pattern) {
6962 default:
6963 // Reassociate instructions.
6965 DelInstrs, InstrIdxForVirtReg);
6966 return;
6968 // A - (B + C)
6969 // ==> (A - B) - C
6970 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
6971 InstrIdxForVirtReg);
6972 break;
6974 // A - (B + C)
6975 // ==> (A - C) - B
6976 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
6977 InstrIdxForVirtReg);
6978 break;
6981 // MUL I=A,B,0
6982 // ADD R,I,C
6983 // ==> MADD R,A,B,C
6984 // --- Create(MADD);
6986 Opc = AArch64::MADDWrrr;
6987 RC = &AArch64::GPR32RegClass;
6988 } else {
6989 Opc = AArch64::MADDXrrr;
6990 RC = &AArch64::GPR64RegClass;
6991 }
6992 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
6993 break;
6996 // MUL I=A,B,0
6997 // ADD R,C,I
6998 // ==> MADD R,A,B,C
6999 // --- Create(MADD);
7001 Opc = AArch64::MADDWrrr;
7002 RC = &AArch64::GPR32RegClass;
7003 } else {
7004 Opc = AArch64::MADDXrrr;
7005 RC = &AArch64::GPR64RegClass;
7006 }
7007 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7008 break;
7011 // MUL I=A,B,0
7012 // ADD R,I,Imm
7013 // ==> MOV V, Imm
7014 // ==> MADD R,A,B,V
7015 // --- Create(MADD);
7016 const TargetRegisterClass *OrrRC;
7017 unsigned BitSize, OrrOpc, ZeroReg;
7019 OrrOpc = AArch64::ORRWri;
7020 OrrRC = &AArch64::GPR32spRegClass;
7021 BitSize = 32;
7022 ZeroReg = AArch64::WZR;
7023 Opc = AArch64::MADDWrrr;
7024 RC = &AArch64::GPR32RegClass;
7025 } else {
7026 OrrOpc = AArch64::ORRXri;
7027 OrrRC = &AArch64::GPR64spRegClass;
7028 BitSize = 64;
7029 ZeroReg = AArch64::XZR;
7030 Opc = AArch64::MADDXrrr;
7031 RC = &AArch64::GPR64RegClass;
7032 }
7033 Register NewVR = MRI.createVirtualRegister(OrrRC);
7034 uint64_t Imm = Root.getOperand(2).getImm();
7035
7036 if (Root.getOperand(3).isImm()) {
7037 unsigned Val = Root.getOperand(3).getImm();
7038 Imm = Imm << Val;
7039 }
7040 uint64_t UImm = SignExtend64(Imm, BitSize);
7041 // The immediate can be composed via a single instruction.
7043 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7044 if (Insn.size() != 1)
7045 return;
7046 auto MovI = Insn.begin();
7048 // MOV is an alias for one of three instructions: movz, movn, and orr.
7049 if (MovI->Opcode == OrrOpc)
7050 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7051 .addReg(ZeroReg)
7052 .addImm(MovI->Op2);
7053 else {
7054 if (BitSize == 32)
7055 assert((MovI->Opcode == AArch64::MOVNWi ||
7056 MovI->Opcode == AArch64::MOVZWi) &&
7057 "Expected opcode");
7058 else
7059 assert((MovI->Opcode == AArch64::MOVNXi ||
7060 MovI->Opcode == AArch64::MOVZXi) &&
7061 "Expected opcode");
7062 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7063 .addImm(MovI->Op1)
7064 .addImm(MovI->Op2);
7065 }
7066 InsInstrs.push_back(MIB1);
7067 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7068 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7069 break;
7070 }
7073 // MUL I=A,B,0
7074 // SUB R,I, C
7075 // ==> SUB V, 0, C
7076 // ==> MADD R,A,B,V // = -C + A*B
7077 // --- Create(MADD);
7078 const TargetRegisterClass *SubRC;
7079 unsigned SubOpc, ZeroReg;
7081 SubOpc = AArch64::SUBWrr;
7082 SubRC = &AArch64::GPR32spRegClass;
7083 ZeroReg = AArch64::WZR;
7084 Opc = AArch64::MADDWrrr;
7085 RC = &AArch64::GPR32RegClass;
7086 } else {
7087 SubOpc = AArch64::SUBXrr;
7088 SubRC = &AArch64::GPR64spRegClass;
7089 ZeroReg = AArch64::XZR;
7090 Opc = AArch64::MADDXrrr;
7091 RC = &AArch64::GPR64RegClass;
7092 }
7093 Register NewVR = MRI.createVirtualRegister(SubRC);
7094 // SUB NewVR, 0, C
7095 MachineInstrBuilder MIB1 =
7096 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7097 .addReg(ZeroReg)
7098 .add(Root.getOperand(2));
7099 InsInstrs.push_back(MIB1);
7100 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7101 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7102 break;
7103 }
7106 // MUL I=A,B,0
7107 // SUB R,C,I
7108 // ==> MSUB R,A,B,C (computes C - A*B)
7109 // --- Create(MSUB);
7111 Opc = AArch64::MSUBWrrr;
7112 RC = &AArch64::GPR32RegClass;
7113 } else {
7114 Opc = AArch64::MSUBXrrr;
7115 RC = &AArch64::GPR64RegClass;
7116 }
7117 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7118 break;
7121 // MUL I=A,B,0
7122 // SUB R,I, Imm
7123 // ==> MOV V, -Imm
7124 // ==> MADD R,A,B,V // = -Imm + A*B
7125 // --- Create(MADD);
7126 const TargetRegisterClass *OrrRC;
7127 unsigned BitSize, OrrOpc, ZeroReg;
7129 OrrOpc = AArch64::ORRWri;
7130 OrrRC = &AArch64::GPR32spRegClass;
7131 BitSize = 32;
7132 ZeroReg = AArch64::WZR;
7133 Opc = AArch64::MADDWrrr;
7134 RC = &AArch64::GPR32RegClass;
7135 } else {
7136 OrrOpc = AArch64::ORRXri;
7137 OrrRC = &AArch64::GPR64spRegClass;
7138 BitSize = 64;
7139 ZeroReg = AArch64::XZR;
7140 Opc = AArch64::MADDXrrr;
7141 RC = &AArch64::GPR64RegClass;
7142 }
7143 Register NewVR = MRI.createVirtualRegister(OrrRC);
7144 uint64_t Imm = Root.getOperand(2).getImm();
7145 if (Root.getOperand(3).isImm()) {
7146 unsigned Val = Root.getOperand(3).getImm();
7147 Imm = Imm << Val;
7148 }
7149 uint64_t UImm = SignExtend64(-Imm, BitSize);
7150 // The immediate can be composed via a single instruction.
7152 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7153 if (Insn.size() != 1)
7154 return;
7155 auto MovI = Insn.begin();
7157 // MOV is an alias for one of three instructions: movz, movn, and orr.
7158 if (MovI->Opcode == OrrOpc)
7159 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7160 .addReg(ZeroReg)
7161 .addImm(MovI->Op2);
7162 else {
7163 if (BitSize == 32)
7164 assert((MovI->Opcode == AArch64::MOVNWi ||
7165 MovI->Opcode == AArch64::MOVZWi) &&
7166 "Expected opcode");
7167 else
7168 assert((MovI->Opcode == AArch64::MOVNXi ||
7169 MovI->Opcode == AArch64::MOVZXi) &&
7170 "Expected opcode");
7171 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7172 .addImm(MovI->Op1)
7173 .addImm(MovI->Op2);
7174 }
7175 InsInstrs.push_back(MIB1);
7176 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7177 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7178 break;
7179 }
7180
7182 Opc = AArch64::MLAv8i8;
7183 RC = &AArch64::FPR64RegClass;
7184 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7185 break;
7187 Opc = AArch64::MLAv8i8;
7188 RC = &AArch64::FPR64RegClass;
7189 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7190 break;
7192 Opc = AArch64::MLAv16i8;
7193 RC = &AArch64::FPR128RegClass;
7194 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7195 break;
7197 Opc = AArch64::MLAv16i8;
7198 RC = &AArch64::FPR128RegClass;
7199 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7200 break;
7202 Opc = AArch64::MLAv4i16;
7203 RC = &AArch64::FPR64RegClass;
7204 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7205 break;
7207 Opc = AArch64::MLAv4i16;
7208 RC = &AArch64::FPR64RegClass;
7209 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7210 break;
7212 Opc = AArch64::MLAv8i16;
7213 RC = &AArch64::FPR128RegClass;
7214 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7215 break;
7217 Opc = AArch64::MLAv8i16;
7218 RC = &AArch64::FPR128RegClass;
7219 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7220 break;
7222 Opc = AArch64::MLAv2i32;
7223 RC = &AArch64::FPR64RegClass;
7224 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7225 break;
7227 Opc = AArch64::MLAv2i32;
7228 RC = &AArch64::FPR64RegClass;
7229 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7230 break;
7232 Opc = AArch64::MLAv4i32;
7233 RC = &AArch64::FPR128RegClass;
7234 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7235 break;
7237 Opc = AArch64::MLAv4i32;
7238 RC = &AArch64::FPR128RegClass;
7239 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7240 break;
7241
7243 Opc = AArch64::MLAv8i8;
7244 RC = &AArch64::FPR64RegClass;
7245 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7246 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7247 RC);
7248 break;
7250 Opc = AArch64::MLSv8i8;
7251 RC = &AArch64::FPR64RegClass;
7252 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7253 break;
7255 Opc = AArch64::MLAv16i8;
7256 RC = &AArch64::FPR128RegClass;
7257 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7258 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7259 RC);
7260 break;
7262 Opc = AArch64::MLSv16i8;
7263 RC = &AArch64::FPR128RegClass;
7264 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7265 break;
7267 Opc = AArch64::MLAv4i16;
7268 RC = &AArch64::FPR64RegClass;
7269 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7270 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7271 RC);
7272 break;
7274 Opc = AArch64::MLSv4i16;
7275 RC = &AArch64::FPR64RegClass;
7276 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7277 break;
7279 Opc = AArch64::MLAv8i16;
7280 RC = &AArch64::FPR128RegClass;
7281 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7282 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7283 RC);
7284 break;
7286 Opc = AArch64::MLSv8i16;
7287 RC = &AArch64::FPR128RegClass;
7288 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7289 break;
7291 Opc = AArch64::MLAv2i32;
7292 RC = &AArch64::FPR64RegClass;
7293 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7294 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7295 RC);
7296 break;
7298 Opc = AArch64::MLSv2i32;
7299 RC = &AArch64::FPR64RegClass;
7300 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7301 break;
7303 Opc = AArch64::MLAv4i32;
7304 RC = &AArch64::FPR128RegClass;
7305 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7306 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7307 RC);
7308 break;
7310 Opc = AArch64::MLSv4i32;
7311 RC = &AArch64::FPR128RegClass;
7312 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7313 break;
7314
7316 Opc = AArch64::MLAv4i16_indexed;
7317 RC = &AArch64::FPR64RegClass;
7318 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7319 break;
7321 Opc = AArch64::MLAv4i16_indexed;
7322 RC = &AArch64::FPR64RegClass;
7323 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7324 break;
7326 Opc = AArch64::MLAv8i16_indexed;
7327 RC = &AArch64::FPR128RegClass;
7328 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7329 break;
7331 Opc = AArch64::MLAv8i16_indexed;
7332 RC = &AArch64::FPR128RegClass;
7333 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7334 break;
7336 Opc = AArch64::MLAv2i32_indexed;
7337 RC = &AArch64::FPR64RegClass;
7338 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7339 break;
7341 Opc = AArch64::MLAv2i32_indexed;
7342 RC = &AArch64::FPR64RegClass;
7343 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7344 break;
7346 Opc = AArch64::MLAv4i32_indexed;
7347 RC = &AArch64::FPR128RegClass;
7348 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7349 break;
7351 Opc = AArch64::MLAv4i32_indexed;
7352 RC = &AArch64::FPR128RegClass;
7353 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7354 break;
7355
7357 Opc = AArch64::MLAv4i16_indexed;
7358 RC = &AArch64::FPR64RegClass;
7359 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7360 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7361 RC);
7362 break;
7364 Opc = AArch64::MLSv4i16_indexed;
7365 RC = &AArch64::FPR64RegClass;
7366 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7367 break;
7369 Opc = AArch64::MLAv8i16_indexed;
7370 RC = &AArch64::FPR128RegClass;
7371 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7372 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7373 RC);
7374 break;
7376 Opc = AArch64::MLSv8i16_indexed;
7377 RC = &AArch64::FPR128RegClass;
7378 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7379 break;
7381 Opc = AArch64::MLAv2i32_indexed;
7382 RC = &AArch64::FPR64RegClass;
7383 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7384 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7385 RC);
7386 break;
7388 Opc = AArch64::MLSv2i32_indexed;
7389 RC = &AArch64::FPR64RegClass;
7390 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7391 break;
7393 Opc = AArch64::MLAv4i32_indexed;
7394 RC = &AArch64::FPR128RegClass;
7395 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7396 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7397 RC);
7398 break;
7400 Opc = AArch64::MLSv4i32_indexed;
7401 RC = &AArch64::FPR128RegClass;
7402 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7403 break;
7404
7405 // Floating Point Support
7407 Opc = AArch64::FMADDHrrr;
7408 RC = &AArch64::FPR16RegClass;
7409 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7410 break;
7412 Opc = AArch64::FMADDSrrr;
7413 RC = &AArch64::FPR32RegClass;
7414 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7415 break;
7417 Opc = AArch64::FMADDDrrr;
7418 RC = &AArch64::FPR64RegClass;
7419 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7420 break;
7421
7423 Opc = AArch64::FMADDHrrr;
7424 RC = &AArch64::FPR16RegClass;
7425 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7426 break;
7428 Opc = AArch64::FMADDSrrr;
7429 RC = &AArch64::FPR32RegClass;
7430 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7431 break;
7433 Opc = AArch64::FMADDDrrr;
7434 RC = &AArch64::FPR64RegClass;
7435 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7436 break;
7437
7439 Opc = AArch64::FMLAv1i32_indexed;
7440 RC = &AArch64::FPR32RegClass;
7441 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7442 FMAInstKind::Indexed);
7443 break;
7445 Opc = AArch64::FMLAv1i32_indexed;
7446 RC = &AArch64::FPR32RegClass;
7447 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7448 FMAInstKind::Indexed);
7449 break;
7450
7452 Opc = AArch64::FMLAv1i64_indexed;
7453 RC = &AArch64::FPR64RegClass;
7454 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7455 FMAInstKind::Indexed);
7456 break;
7458 Opc = AArch64::FMLAv1i64_indexed;
7459 RC = &AArch64::FPR64RegClass;
7460 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7461 FMAInstKind::Indexed);
7462 break;
7463
7465 RC = &AArch64::FPR64RegClass;
7466 Opc = AArch64::FMLAv4i16_indexed;
7467 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7468 FMAInstKind::Indexed);
7469 break;
7471 RC = &AArch64::FPR64RegClass;
7472 Opc = AArch64::FMLAv4f16;
7473 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7474 FMAInstKind::Accumulator);
7475 break;
7477 RC = &AArch64::FPR64RegClass;
7478 Opc = AArch64::FMLAv4i16_indexed;
7479 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7480 FMAInstKind::Indexed);
7481 break;
7483 RC = &AArch64::FPR64RegClass;
7484 Opc = AArch64::FMLAv4f16;
7485 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7486 FMAInstKind::Accumulator);
7487 break;
7488
7491 RC = &AArch64::FPR64RegClass;
7493 Opc = AArch64::FMLAv2i32_indexed;
7494 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7495 FMAInstKind::Indexed);
7496 } else {
7497 Opc = AArch64::FMLAv2f32;
7498 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7499 FMAInstKind::Accumulator);
7500 }
7501 break;
7504 RC = &AArch64::FPR64RegClass;
7506 Opc = AArch64::FMLAv2i32_indexed;
7507 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7508 FMAInstKind::Indexed);
7509 } else {
7510 Opc = AArch64::FMLAv2f32;
7511 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7512 FMAInstKind::Accumulator);
7513 }
7514 break;
7515
7517 RC = &AArch64::FPR128RegClass;
7518 Opc = AArch64::FMLAv8i16_indexed;
7519 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7520 FMAInstKind::Indexed);
7521 break;
7523 RC = &AArch64::FPR128RegClass;
7524 Opc = AArch64::FMLAv8f16;
7525 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7526 FMAInstKind::Accumulator);
7527 break;
7529 RC = &AArch64::FPR128RegClass;
7530 Opc = AArch64::FMLAv8i16_indexed;
7531 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7532 FMAInstKind::Indexed);
7533 break;
7535 RC = &AArch64::FPR128RegClass;
7536 Opc = AArch64::FMLAv8f16;
7537 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7538 FMAInstKind::Accumulator);
7539 break;
7540
7543 RC = &AArch64::FPR128RegClass;
7545 Opc = AArch64::FMLAv2i64_indexed;
7546 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7547 FMAInstKind::Indexed);
7548 } else {
7549 Opc = AArch64::FMLAv2f64;
7550 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7551 FMAInstKind::Accumulator);
7552 }
7553 break;
7556 RC = &AArch64::FPR128RegClass;
7558 Opc = AArch64::FMLAv2i64_indexed;
7559 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7560 FMAInstKind::Indexed);
7561 } else {
7562 Opc = AArch64::FMLAv2f64;
7563 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7564 FMAInstKind::Accumulator);
7565 }
7566 break;
7567
7570 RC = &AArch64::FPR128RegClass;
7572 Opc = AArch64::FMLAv4i32_indexed;
7573 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7574 FMAInstKind::Indexed);
7575 } else {
7576 Opc = AArch64::FMLAv4f32;
7577 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7578 FMAInstKind::Accumulator);
7579 }
7580 break;
7581
7584 RC = &AArch64::FPR128RegClass;
7586 Opc = AArch64::FMLAv4i32_indexed;
7587 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7588 FMAInstKind::Indexed);
7589 } else {
7590 Opc = AArch64::FMLAv4f32;
7591 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7592 FMAInstKind::Accumulator);
7593 }
7594 break;
7595
7597 Opc = AArch64::FNMSUBHrrr;
7598 RC = &AArch64::FPR16RegClass;
7599 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7600 break;
7602 Opc = AArch64::FNMSUBSrrr;
7603 RC = &AArch64::FPR32RegClass;
7604 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7605 break;
7607 Opc = AArch64::FNMSUBDrrr;
7608 RC = &AArch64::FPR64RegClass;
7609 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7610 break;
7611
7613 Opc = AArch64::FNMADDHrrr;
7614 RC = &AArch64::FPR16RegClass;
7615 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7616 break;
7618 Opc = AArch64::FNMADDSrrr;
7619 RC = &AArch64::FPR32RegClass;
7620 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7621 break;
7623 Opc = AArch64::FNMADDDrrr;
7624 RC = &AArch64::FPR64RegClass;
7625 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7626 break;
7627
7629 Opc = AArch64::FMSUBHrrr;
7630 RC = &AArch64::FPR16RegClass;
7631 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7632 break;
7634 Opc = AArch64::FMSUBSrrr;
7635 RC = &AArch64::FPR32RegClass;
7636 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7637 break;
7639 Opc = AArch64::FMSUBDrrr;
7640 RC = &AArch64::FPR64RegClass;
7641 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7642 break;
7643
7645 Opc = AArch64::FMLSv1i32_indexed;
7646 RC = &AArch64::FPR32RegClass;
7647 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7648 FMAInstKind::Indexed);
7649 break;
7650
7652 Opc = AArch64::FMLSv1i64_indexed;
7653 RC = &AArch64::FPR64RegClass;
7654 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7655 FMAInstKind::Indexed);
7656 break;
7657
7660 RC = &AArch64::FPR64RegClass;
7661 Register NewVR = MRI.createVirtualRegister(RC);
7662 MachineInstrBuilder MIB1 =
7663 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7664 .add(Root.getOperand(2));
7665 InsInstrs.push_back(MIB1);
7666 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7668 Opc = AArch64::FMLAv4f16;
7669 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7670 FMAInstKind::Accumulator, &NewVR);
7671 } else {
7672 Opc = AArch64::FMLAv4i16_indexed;
7673 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7674 FMAInstKind::Indexed, &NewVR);
7675 }
7676 break;
7677 }
7679 RC = &AArch64::FPR64RegClass;
7680 Opc = AArch64::FMLSv4f16;
7681 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7682 FMAInstKind::Accumulator);
7683 break;
7685 RC = &AArch64::FPR64RegClass;
7686 Opc = AArch64::FMLSv4i16_indexed;
7687 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7688 FMAInstKind::Indexed);
7689 break;
7690
7693 RC = &AArch64::FPR64RegClass;
7695 Opc = AArch64::FMLSv2i32_indexed;
7696 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7697 FMAInstKind::Indexed);
7698 } else {
7699 Opc = AArch64::FMLSv2f32;
7700 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7701 FMAInstKind::Accumulator);
7702 }
7703 break;
7704
7707 RC = &AArch64::FPR128RegClass;
7708 Register NewVR = MRI.createVirtualRegister(RC);
7709 MachineInstrBuilder MIB1 =
7710 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7711 .add(Root.getOperand(2));
7712 InsInstrs.push_back(MIB1);
7713 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7715 Opc = AArch64::FMLAv8f16;
7716 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7717 FMAInstKind::Accumulator, &NewVR);
7718 } else {
7719 Opc = AArch64::FMLAv8i16_indexed;
7720 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7721 FMAInstKind::Indexed, &NewVR);
7722 }
7723 break;
7724 }
7726 RC = &AArch64::FPR128RegClass;
7727 Opc = AArch64::FMLSv8f16;
7728 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7729 FMAInstKind::Accumulator);
7730 break;
7732 RC = &AArch64::FPR128RegClass;
7733 Opc = AArch64::FMLSv8i16_indexed;
7734 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7735 FMAInstKind::Indexed);
7736 break;
7737
7740 RC = &AArch64::FPR128RegClass;
7742 Opc = AArch64::FMLSv2i64_indexed;
7743 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7744 FMAInstKind::Indexed);
7745 } else {
7746 Opc = AArch64::FMLSv2f64;
7747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7748 FMAInstKind::Accumulator);
7749 }
7750 break;
7751
7754 RC = &AArch64::FPR128RegClass;
7756 Opc = AArch64::FMLSv4i32_indexed;
7757 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7758 FMAInstKind::Indexed);
7759 } else {
7760 Opc = AArch64::FMLSv4f32;
7761 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7762 FMAInstKind::Accumulator);
7763 }
7764 break;
7767 RC = &AArch64::FPR64RegClass;
7768 Register NewVR = MRI.createVirtualRegister(RC);
7769 MachineInstrBuilder MIB1 =
7770 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7771 .add(Root.getOperand(2));
7772 InsInstrs.push_back(MIB1);
7773 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7775 Opc = AArch64::FMLAv2i32_indexed;
7776 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7777 FMAInstKind::Indexed, &NewVR);
7778 } else {
7779 Opc = AArch64::FMLAv2f32;
7780 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7781 FMAInstKind::Accumulator, &NewVR);
7782 }
7783 break;
7784 }
7787 RC = &AArch64::FPR128RegClass;
7788 Register NewVR = MRI.createVirtualRegister(RC);
7789 MachineInstrBuilder MIB1 =
7790 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7791 .add(Root.getOperand(2));
7792 InsInstrs.push_back(MIB1);
7793 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7795 Opc = AArch64::FMLAv4i32_indexed;
7796 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7797 FMAInstKind::Indexed, &NewVR);
7798 } else {
7799 Opc = AArch64::FMLAv4f32;
7800 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7801 FMAInstKind::Accumulator, &NewVR);
7802 }
7803 break;
7804 }
7807 RC = &AArch64::FPR128RegClass;
7808 Register NewVR = MRI.createVirtualRegister(RC);
7809 MachineInstrBuilder MIB1 =
7810 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7811 .add(Root.getOperand(2));
7812 InsInstrs.push_back(MIB1);
7813 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7815 Opc = AArch64::FMLAv2i64_indexed;
7816 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7817 FMAInstKind::Indexed, &NewVR);
7818 } else {
7819 Opc = AArch64::FMLAv2f64;
7820 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7821 FMAInstKind::Accumulator, &NewVR);
7822 }
7823 break;
7824 }
7827 unsigned IdxDupOp =
7829 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7830 &AArch64::FPR128RegClass, MRI);
7831 break;
7832 }
7835 unsigned IdxDupOp =
7837 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7838 &AArch64::FPR128RegClass, MRI);
7839 break;
7840 }
7843 unsigned IdxDupOp =
7845 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7846 &AArch64::FPR128_loRegClass, MRI);
7847 break;
7848 }
7851 unsigned IdxDupOp =
7853 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7854 &AArch64::FPR128RegClass, MRI);
7855 break;
7856 }
7859 unsigned IdxDupOp =
7861 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7862 &AArch64::FPR128_loRegClass, MRI);
7863 break;
7864 }
7866 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7867 break;
7868 }
7869
7870 } // end switch (Pattern)
7871 // Record MUL and ADD/SUB for deletion
7872 if (MUL)
7873 DelInstrs.push_back(MUL);
7874 DelInstrs.push_back(&Root);
7875
7876 // Set the flags on the inserted instructions to be the merged flags of the
7877 // instructions that we have combined.
7878 uint32_t Flags = Root.getFlags();
7879 if (MUL)
7880 Flags = Root.mergeFlagsWith(*MUL);
7881 for (auto *MI : InsInstrs)
7882 MI->setFlags(Flags);
7883}
7884
7885/// Replace csincr-branch sequence by simple conditional branch
7886///
7887/// Examples:
7888/// 1. \code
7889/// csinc w9, wzr, wzr, <condition code>
7890/// tbnz w9, #0, 0x44
7891/// \endcode
7892/// to
7893/// \code
7894/// b.<inverted condition code>
7895/// \endcode
7896///
7897/// 2. \code
7898/// csinc w9, wzr, wzr, <condition code>
7899/// tbz w9, #0, 0x44
7900/// \endcode
7901/// to
7902/// \code
7903/// b.<condition code>
7904/// \endcode
7905///
7906/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7907/// compare's constant operand is power of 2.
7908///
7909/// Examples:
7910/// \code
7911/// and w8, w8, #0x400
7912/// cbnz w8, L1
7913/// \endcode
7914/// to
7915/// \code
7916/// tbnz w8, #10, L1
7917/// \endcode
7918///
7919/// \param MI Conditional Branch
7920/// \return True when the simple conditional branch is generated
7921///
7923 bool IsNegativeBranch = false;
7924 bool IsTestAndBranch = false;
7925 unsigned TargetBBInMI = 0;
7926 switch (MI.getOpcode()) {
7927 default:
7928 llvm_unreachable("Unknown branch instruction?");
7929 case AArch64::Bcc:
7930 return false;
7931 case AArch64::CBZW:
7932 case AArch64::CBZX:
7933 TargetBBInMI = 1;
7934 break;
7935 case AArch64::CBNZW:
7936 case AArch64::CBNZX:
7937 TargetBBInMI = 1;
7938 IsNegativeBranch = true;
7939 break;
7940 case AArch64::TBZW:
7941 case AArch64::TBZX:
7942 TargetBBInMI = 2;
7943 IsTestAndBranch = true;
7944 break;
7945 case AArch64::TBNZW:
7946 case AArch64::TBNZX:
7947 TargetBBInMI = 2;
7948 IsNegativeBranch = true;
7949 IsTestAndBranch = true;
7950 break;
7951 }
7952 // So we increment a zero register and test for bits other
7953 // than bit 0? Conservatively bail out in case the verifier
7954 // missed this case.
7955 if (IsTestAndBranch && MI.getOperand(1).getImm())
7956 return false;
7957
7958 // Find Definition.
7959 assert(MI.getParent() && "Incomplete machine instruciton\n");
7960 MachineBasicBlock *MBB = MI.getParent();
7961 MachineFunction *MF = MBB->getParent();
7963 Register VReg = MI.getOperand(0).getReg();
7964 if (!VReg.isVirtual())
7965 return false;
7966
7967 MachineInstr *DefMI = MRI->getVRegDef(VReg);
7968
7969 // Look through COPY instructions to find definition.
7970 while (DefMI->isCopy()) {
7971 Register CopyVReg = DefMI->getOperand(1).getReg();
7972 if (!MRI->hasOneNonDBGUse(CopyVReg))
7973 return false;
7974 if (!MRI->hasOneDef(CopyVReg))
7975 return false;
7976 DefMI = MRI->getVRegDef(CopyVReg);
7977 }
7978
7979 switch (DefMI->getOpcode()) {
7980 default:
7981 return false;
7982 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
7983 case AArch64::ANDWri:
7984 case AArch64::ANDXri: {
7985 if (IsTestAndBranch)
7986 return false;
7987 if (DefMI->getParent() != MBB)
7988 return false;
7989 if (!MRI->hasOneNonDBGUse(VReg))
7990 return false;
7991
7992 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
7994 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
7995 if (!isPowerOf2_64(Mask))
7996 return false;
7997
7999 Register NewReg = MO.getReg();
8000 if (!NewReg.isVirtual())
8001 return false;
8002
8003 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8004
8005 MachineBasicBlock &RefToMBB = *MBB;
8006 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8007 DebugLoc DL = MI.getDebugLoc();
8008 unsigned Imm = Log2_64(Mask);
8009 unsigned Opc = (Imm < 32)
8010 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8011 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8012 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8013 .addReg(NewReg)
8014 .addImm(Imm)
8015 .addMBB(TBB);
8016 // Register lives on to the CBZ now.
8017 MO.setIsKill(false);
8018
8019 // For immediate smaller than 32, we need to use the 32-bit
8020 // variant (W) in all cases. Indeed the 64-bit variant does not
8021 // allow to encode them.
8022 // Therefore, if the input register is 64-bit, we need to take the
8023 // 32-bit sub-part.
8024 if (!Is32Bit && Imm < 32)
8025 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8026 MI.eraseFromParent();
8027 return true;
8028 }
8029 // Look for CSINC
8030 case AArch64::CSINCWr:
8031 case AArch64::CSINCXr: {
8032 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8033 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8034 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8035 DefMI->getOperand(2).getReg() == AArch64::XZR))
8036 return false;
8037
8038 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
8039 return false;
8040
8042 // Convert only when the condition code is not modified between
8043 // the CSINC and the branch. The CC may be used by other
8044 // instructions in between.
8046 return false;
8047 MachineBasicBlock &RefToMBB = *MBB;
8048 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8049 DebugLoc DL = MI.getDebugLoc();
8050 if (IsNegativeBranch)
8052 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8053 MI.eraseFromParent();
8054 return true;
8055 }
8056 }
8057}
8058
8059std::pair<unsigned, unsigned>
8061 const unsigned Mask = AArch64II::MO_FRAGMENT;
8062 return std::make_pair(TF & Mask, TF & ~Mask);
8063}
8064
8067 using namespace AArch64II;
8068
8069 static const std::pair<unsigned, const char *> TargetFlags[] = {
8070 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8071 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8072 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8073 {MO_HI12, "aarch64-hi12"}};
8074 return ArrayRef(TargetFlags);
8075}
8076
8079 using namespace AArch64II;
8080
8081 static const std::pair<unsigned, const char *> TargetFlags[] = {
8082 {MO_COFFSTUB, "aarch64-coffstub"},
8083 {MO_GOT, "aarch64-got"},
8084 {MO_NC, "aarch64-nc"},
8085 {MO_S, "aarch64-s"},
8086 {MO_TLS, "aarch64-tls"},
8087 {MO_DLLIMPORT, "aarch64-dllimport"},
8088 {MO_PREL, "aarch64-prel"},
8089 {MO_TAGGED, "aarch64-tagged"},
8090 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8091 };
8092 return ArrayRef(TargetFlags);
8093}
8094
8097 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8098 {{MOSuppressPair, "aarch64-suppress-pair"},
8099 {MOStridedAccess, "aarch64-strided-access"}};
8100 return ArrayRef(TargetFlags);
8101}
8102
8103/// Constants defining how certain sequences should be outlined.
8104/// This encompasses how an outlined function should be called, and what kind of
8105/// frame should be emitted for that outlined function.
8106///
8107/// \p MachineOutlinerDefault implies that the function should be called with
8108/// a save and restore of LR to the stack.
8109///
8110/// That is,
8111///
8112/// I1 Save LR OUTLINED_FUNCTION:
8113/// I2 --> BL OUTLINED_FUNCTION I1
8114/// I3 Restore LR I2
8115/// I3
8116/// RET
8117///
8118/// * Call construction overhead: 3 (save + BL + restore)
8119/// * Frame construction overhead: 1 (ret)
8120/// * Requires stack fixups? Yes
8121///
8122/// \p MachineOutlinerTailCall implies that the function is being created from
8123/// a sequence of instructions ending in a return.
8124///
8125/// That is,
8126///
8127/// I1 OUTLINED_FUNCTION:
8128/// I2 --> B OUTLINED_FUNCTION I1
8129/// RET I2
8130/// RET
8131///
8132/// * Call construction overhead: 1 (B)
8133/// * Frame construction overhead: 0 (Return included in sequence)
8134/// * Requires stack fixups? No
8135///
8136/// \p MachineOutlinerNoLRSave implies that the function should be called using
8137/// a BL instruction, but doesn't require LR to be saved and restored. This
8138/// happens when LR is known to be dead.
8139///
8140/// That is,
8141///
8142/// I1 OUTLINED_FUNCTION:
8143/// I2 --> BL OUTLINED_FUNCTION I1
8144/// I3 I2
8145/// I3
8146/// RET
8147///
8148/// * Call construction overhead: 1 (BL)
8149/// * Frame construction overhead: 1 (RET)
8150/// * Requires stack fixups? No
8151///
8152/// \p MachineOutlinerThunk implies that the function is being created from
8153/// a sequence of instructions ending in a call. The outlined function is
8154/// called with a BL instruction, and the outlined function tail-calls the
8155/// original call destination.
8156///
8157/// That is,
8158///
8159/// I1 OUTLINED_FUNCTION:
8160/// I2 --> BL OUTLINED_FUNCTION I1
8161/// BL f I2
8162/// B f
8163/// * Call construction overhead: 1 (BL)
8164/// * Frame construction overhead: 0
8165/// * Requires stack fixups? No
8166///
8167/// \p MachineOutlinerRegSave implies that the function should be called with a
8168/// save and restore of LR to an available register. This allows us to avoid
8169/// stack fixups. Note that this outlining variant is compatible with the
8170/// NoLRSave case.
8171///
8172/// That is,
8173///
8174/// I1 Save LR OUTLINED_FUNCTION:
8175/// I2 --> BL OUTLINED_FUNCTION I1
8176/// I3 Restore LR I2
8177/// I3
8178/// RET
8179///
8180/// * Call construction overhead: 3 (save + BL + restore)
8181/// * Frame construction overhead: 1 (ret)
8182/// * Requires stack fixups? No
8184 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8185 MachineOutlinerTailCall, /// Only emit a branch.
8186 MachineOutlinerNoLRSave, /// Emit a call and return.
8187 MachineOutlinerThunk, /// Emit a call and tail-call.
8188 MachineOutlinerRegSave /// Same as default, but save to a register.
8190
8194 UnsafeRegsDead = 0x8
8196
8198AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8199 MachineFunction *MF = C.getMF();
8201 const AArch64RegisterInfo *ARI =
8202 static_cast<const AArch64RegisterInfo *>(&TRI);
8203 // Check if there is an available register across the sequence that we can
8204 // use.
8205 for (unsigned Reg : AArch64::GPR64RegClass) {
8206 if (!ARI->isReservedReg(*MF, Reg) &&
8207 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8208 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8209 Reg != AArch64::X17 && // Ditto for X17.
8210 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8211 C.isAvailableInsideSeq(Reg, TRI))
8212 return Reg;
8213 }
8214 return Register();
8215}
8216
8217static bool
8219 const outliner::Candidate &b) {
8220 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8221 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8222
8223 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8224 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8225}
8226
8227static bool
8229 const outliner::Candidate &b) {
8230 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8231 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8232
8233 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8234}
8235
8237 const outliner::Candidate &b) {
8238 const AArch64Subtarget &SubtargetA =
8240 const AArch64Subtarget &SubtargetB =
8241 b.getMF()->getSubtarget<AArch64Subtarget>();
8242 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8243}
8244
8245std::optional<outliner::OutlinedFunction>
8247 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8248 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
8249
8250 unsigned SequenceSize = 0;
8251 for (auto &MI : FirstCand)
8252 SequenceSize += getInstSizeInBytes(MI);
8253
8254 unsigned NumBytesToCreateFrame = 0;
8255
8256 // We only allow outlining for functions having exactly matching return
8257 // address signing attributes, i.e., all share the same value for the
8258 // attribute "sign-return-address" and all share the same type of key they
8259 // are signed with.
8260 // Additionally we require all functions to simultaniously either support
8261 // v8.3a features or not. Otherwise an outlined function could get signed
8262 // using dedicated v8.3 instructions and a call from a function that doesn't
8263 // support v8.3 instructions would therefore be invalid.
8264 if (std::adjacent_find(
8265 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8266 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8267 // Return true if a and b are non-equal w.r.t. return address
8268 // signing or support of v8.3a features
8269 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8270 outliningCandidatesSigningKeyConsensus(a, b) &&
8271 outliningCandidatesV8_3OpsConsensus(a, b)) {
8272 return false;
8273 }
8274 return true;
8275 }) != RepeatedSequenceLocs.end()) {
8276 return std::nullopt;
8277 }
8278
8279 // Since at this point all candidates agree on their return address signing
8280 // picking just one is fine. If the candidate functions potentially sign their
8281 // return addresses, the outlined function should do the same. Note that in
8282 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8283 // not certainly true that the outlined function will have to sign its return
8284 // address but this decision is made later, when the decision to outline
8285 // has already been made.
8286 // The same holds for the number of additional instructions we need: On
8287 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8288 // necessary. However, at this point we don't know if the outlined function
8289 // will have a RET instruction so we assume the worst.
8290 const TargetRegisterInfo &TRI = getRegisterInfo();
8291 // Performing a tail call may require extra checks when PAuth is enabled.
8292 // If PAuth is disabled, set it to zero for uniformity.
8293 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8294 if (FirstCand.getMF()
8295 ->getInfo<AArch64FunctionInfo>()
8296 ->shouldSignReturnAddress(true)) {
8297 // One PAC and one AUT instructions
8298 NumBytesToCreateFrame += 8;
8299
8300 // PAuth is enabled - set extra tail call cost, if any.
8301 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8302 NumBytesToCheckLRInTCEpilogue =
8304 // Checking the authenticated LR value may significantly impact
8305 // SequenceSize, so account for it for more precise results.
8306 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8307 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8308
8309 // We have to check if sp modifying instructions would get outlined.
8310 // If so we only allow outlining if sp is unchanged overall, so matching
8311 // sub and add instructions are okay to outline, all other sp modifications
8312 // are not
8313 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8314 int SPValue = 0;
8315 for (auto &MI : C) {
8316 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8317 switch (MI.getOpcode()) {
8318 case AArch64::ADDXri:
8319 case AArch64::ADDWri:
8320 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8321 assert(MI.getOperand(2).isImm() &&
8322 "Expected operand to be immediate");
8323 assert(MI.getOperand(1).isReg() &&
8324 "Expected operand to be a register");
8325 // Check if the add just increments sp. If so, we search for
8326 // matching sub instructions that decrement sp. If not, the
8327 // modification is illegal
8328 if (MI.getOperand(1).getReg() == AArch64::SP)
8329 SPValue += MI.getOperand(2).getImm();
8330 else
8331 return true;
8332 break;
8333 case AArch64::SUBXri:
8334 case AArch64::SUBWri:
8335 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8336 assert(MI.getOperand(2).isImm() &&
8337 "Expected operand to be immediate");
8338 assert(MI.getOperand(1).isReg() &&
8339 "Expected operand to be a register");
8340 // Check if the sub just decrements sp. If so, we search for
8341 // matching add instructions that increment sp. If not, the
8342 // modification is illegal
8343 if (MI.getOperand(1).getReg() == AArch64::SP)
8344 SPValue -= MI.getOperand(2).getImm();
8345 else
8346 return true;
8347 break;
8348 default:
8349 return true;
8350 }
8351 }
8352 }
8353 if (SPValue)
8354 return true;
8355 return false;
8356 };
8357 // Remove candidates with illegal stack modifying instructions
8358 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8359
8360 // If the sequence doesn't have enough candidates left, then we're done.
8361 if (RepeatedSequenceLocs.size() < 2)
8362 return std::nullopt;
8363 }
8364
8365 // Properties about candidate MBBs that hold for all of them.
8366 unsigned FlagsSetInAll = 0xF;
8367
8368 // Compute liveness information for each candidate, and set FlagsSetInAll.
8369 for (outliner::Candidate &C : RepeatedSequenceLocs)
8370 FlagsSetInAll &= C.Flags;
8371
8372 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8373
8374 // Helper lambda which sets call information for every candidate.
8375 auto SetCandidateCallInfo =
8376 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8377 for (outliner::Candidate &C : RepeatedSequenceLocs)
8378 C.setCallInfo(CallID, NumBytesForCall);
8379 };
8380
8381 unsigned FrameID = MachineOutlinerDefault;
8382 NumBytesToCreateFrame += 4;
8383
8384 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8385 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8386 });
8387
8388 // We check to see if CFI Instructions are present, and if they are
8389 // we find the number of CFI Instructions in the candidates.
8390 unsigned CFICount = 0;
8391 for (auto &I : RepeatedSequenceLocs[0]) {
8392 if (I.isCFIInstruction())
8393 CFICount++;
8394 }
8395
8396 // We compare the number of found CFI Instructions to the number of CFI
8397 // instructions in the parent function for each candidate. We must check this
8398 // since if we outline one of the CFI instructions in a function, we have to
8399 // outline them all for correctness. If we do not, the address offsets will be
8400 // incorrect between the two sections of the program.
8401 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8402 std::vector<MCCFIInstruction> CFIInstructions =
8403 C.getMF()->getFrameInstructions();
8404
8405 if (CFICount > 0 && CFICount != CFIInstructions.size())
8406 return std::nullopt;
8407 }
8408
8409 // Returns true if an instructions is safe to fix up, false otherwise.
8410 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8411 if (MI.isCall())
8412 return true;
8413
8414 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8415 !MI.readsRegister(AArch64::SP, &TRI))
8416 return true;
8417
8418 // Any modification of SP will break our code to save/restore LR.
8419 // FIXME: We could handle some instructions which add a constant
8420 // offset to SP, with a bit more work.
8421 if (MI.modifiesRegister(AArch64::SP, &TRI))
8422 return false;
8423
8424 // At this point, we have a stack instruction that we might need to
8425 // fix up. We'll handle it if it's a load or store.
8426 if (MI.mayLoadOrStore()) {
8427 const MachineOperand *Base; // Filled with the base operand of MI.
8428 int64_t Offset; // Filled with the offset of MI.
8429 bool OffsetIsScalable;
8430
8431 // Does it allow us to offset the base operand and is the base the
8432 // register SP?
8433 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8434 !Base->isReg() || Base->getReg() != AArch64::SP)
8435 return false;
8436
8437 // Fixe-up code below assumes bytes.
8438 if (OffsetIsScalable)
8439 return false;
8440
8441 // Find the minimum/maximum offset for this instruction and check
8442 // if fixing it up would be in range.
8443 int64_t MinOffset,
8444 MaxOffset; // Unscaled offsets for the instruction.
8445 // The scale to multiply the offsets by.
8446 TypeSize Scale(0U, false), DummyWidth(0U, false);
8447 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8448
8449 Offset += 16; // Update the offset to what it would be if we outlined.
8450 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8451 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8452 return false;
8453
8454 // It's in range, so we can outline it.
8455 return true;
8456 }
8457
8458 // FIXME: Add handling for instructions like "add x0, sp, #8".
8459
8460 // We can't fix it up, so don't outline it.
8461 return false;
8462 };
8463
8464 // True if it's possible to fix up each stack instruction in this sequence.
8465 // Important for frames/call variants that modify the stack.
8466 bool AllStackInstrsSafe = llvm::all_of(FirstCand, IsSafeToFixup);
8467
8468 // If the last instruction in any candidate is a terminator, then we should
8469 // tail call all of the candidates.
8470 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8471 FrameID = MachineOutlinerTailCall;
8472 NumBytesToCreateFrame = 0;
8473 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8474 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8475 }
8476
8477 else if (LastInstrOpcode == AArch64::BL ||
8478 ((LastInstrOpcode == AArch64::BLR ||
8479 LastInstrOpcode == AArch64::BLRNoIP) &&
8480 !HasBTI)) {
8481 // FIXME: Do we need to check if the code after this uses the value of LR?
8482 FrameID = MachineOutlinerThunk;
8483 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8484 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8485 }
8486
8487 else {
8488 // We need to decide how to emit calls + frames. We can always emit the same
8489 // frame if we don't need to save to the stack. If we have to save to the
8490 // stack, then we need a different frame.
8491 unsigned NumBytesNoStackCalls = 0;
8492 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8493
8494 // Check if we have to save LR.
8495 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8496 bool LRAvailable =
8497 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8498 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8499 : true;
8500 // If we have a noreturn caller, then we're going to be conservative and
8501 // say that we have to save LR. If we don't have a ret at the end of the
8502 // block, then we can't reason about liveness accurately.
8503 //
8504 // FIXME: We can probably do better than always disabling this in
8505 // noreturn functions by fixing up the liveness info.
8506 bool IsNoReturn =
8507 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8508
8509 // Is LR available? If so, we don't need a save.
8510 if (LRAvailable && !IsNoReturn) {
8511 NumBytesNoStackCalls += 4;
8512 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8513 CandidatesWithoutStackFixups.push_back(C);
8514 }
8515
8516 // Is an unused register available? If so, we won't modify the stack, so
8517 // we can outline with the same frame type as those that don't save LR.
8518 else if (findRegisterToSaveLRTo(C)) {
8519 NumBytesNoStackCalls += 12;
8520 C.setCallInfo(MachineOutlinerRegSave, 12);
8521 CandidatesWithoutStackFixups.push_back(C);
8522 }
8523
8524 // Is SP used in the sequence at all? If not, we don't have to modify
8525 // the stack, so we are guaranteed to get the same frame.
8526 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8527 NumBytesNoStackCalls += 12;
8528 C.setCallInfo(MachineOutlinerDefault, 12);
8529 CandidatesWithoutStackFixups.push_back(C);
8530 }
8531
8532 // If we outline this, we need to modify the stack. Pretend we don't
8533 // outline this by saving all of its bytes.
8534 else {
8535 NumBytesNoStackCalls += SequenceSize;
8536 }
8537 }
8538
8539 // If there are no places where we have to save LR, then note that we
8540 // don't have to update the stack. Otherwise, give every candidate the
8541 // default call type, as long as it's safe to do so.
8542 if (!AllStackInstrsSafe ||
8543 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8544 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8545 FrameID = MachineOutlinerNoLRSave;
8546 } else {
8547 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8548
8549 // Bugzilla ID: 46767
8550 // TODO: Check if fixing up the stack more than once is safe so we can
8551 // outline these.
8552 //
8553 // An outline resulting in a caller that requires stack fixups at the
8554 // callsite to a callee that also requires stack fixups can happen when
8555 // there are no available registers at the candidate callsite for a
8556 // candidate that itself also has calls.
8557 //
8558 // In other words if function_containing_sequence in the following pseudo
8559 // assembly requires that we save LR at the point of the call, but there
8560 // are no available registers: in this case we save using SP and as a
8561 // result the SP offsets requires stack fixups by multiples of 16.
8562 //
8563 // function_containing_sequence:
8564 // ...
8565 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8566 // call OUTLINED_FUNCTION_N
8567 // restore LR from SP
8568 // ...
8569 //
8570 // OUTLINED_FUNCTION_N:
8571 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8572 // ...
8573 // bl foo
8574 // restore LR from SP
8575 // ret
8576 //
8577 // Because the code to handle more than one stack fixup does not
8578 // currently have the proper checks for legality, these cases will assert
8579 // in the AArch64 MachineOutliner. This is because the code to do this
8580 // needs more hardening, testing, better checks that generated code is
8581 // legal, etc and because it is only verified to handle a single pass of
8582 // stack fixup.
8583 //
8584 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8585 // these cases until they are known to be handled. Bugzilla 46767 is
8586 // referenced in comments at the assert site.
8587 //
8588 // To avoid asserting (or generating non-legal code on noassert builds)
8589 // we remove all candidates which would need more than one stack fixup by
8590 // pruning the cases where the candidate has calls while also having no
8591 // available LR and having no available general purpose registers to copy
8592 // LR to (ie one extra stack save/restore).
8593 //
8594 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8595 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
8596 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8597 return (llvm::any_of(C, IsCall)) &&
8598 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
8599 !findRegisterToSaveLRTo(C));
8600 });
8601 }
8602 }
8603
8604 // If we dropped all of the candidates, bail out here.
8605 if (RepeatedSequenceLocs.size() < 2) {
8606 RepeatedSequenceLocs.clear();
8607 return std::nullopt;
8608 }
8609 }
8610
8611 // Does every candidate's MBB contain a call? If so, then we might have a call
8612 // in the range.
8613 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8614 // Check if the range contains a call. These require a save + restore of the
8615 // link register.
8616 bool ModStackToSaveLR = false;
8617 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
8618 [](const MachineInstr &MI) { return MI.isCall(); }))
8619 ModStackToSaveLR = true;
8620
8621 // Handle the last instruction separately. If this is a tail call, then the
8622 // last instruction is a call. We don't want to save + restore in this case.
8623 // However, it could be possible that the last instruction is a call without
8624 // it being valid to tail call this sequence. We should consider this as
8625 // well.
8626 else if (FrameID != MachineOutlinerThunk &&
8627 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8628 ModStackToSaveLR = true;
8629
8630 if (ModStackToSaveLR) {
8631 // We can't fix up the stack. Bail out.
8632 if (!AllStackInstrsSafe) {
8633 RepeatedSequenceLocs.clear();
8634 return std::nullopt;
8635 }
8636
8637 // Save + restore LR.
8638 NumBytesToCreateFrame += 8;
8639 }
8640 }
8641
8642 // If we have CFI instructions, we can only outline if the outlined section
8643 // can be a tail call
8644 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
8645 return std::nullopt;
8646
8647 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
8648 NumBytesToCreateFrame, FrameID);
8649}
8650
8652 Function &F, std::vector<outliner::Candidate> &Candidates) const {
8653 // If a bunch of candidates reach this point they must agree on their return
8654 // address signing. It is therefore enough to just consider the signing
8655 // behaviour of one of them
8656 const auto &CFn = Candidates.front().getMF()->getFunction();
8657
8658 // Since all candidates belong to the same module, just copy the
8659 // function-level attributes of an arbitrary function.
8660 if (CFn.hasFnAttribute("sign-return-address"))
8661 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
8662 if (CFn.hasFnAttribute("sign-return-address-key"))
8663 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
8664
8665 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8666}
8667
8669 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8670 const Function &F = MF.getFunction();
8671
8672 // Can F be deduplicated by the linker? If it can, don't outline from it.
8673 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8674 return false;
8675
8676 // Don't outline from functions with section markings; the program could
8677 // expect that all the code is in the named section.
8678 // FIXME: Allow outlining from multiple functions with the same section
8679 // marking.
8680 if (F.hasSection())
8681 return false;
8682
8683 // Outlining from functions with redzones is unsafe since the outliner may
8684 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8685 // outline from it.
8687 if (!AFI || AFI->hasRedZone().value_or(true))
8688 return false;
8689
8690 // FIXME: Teach the outliner to generate/handle Windows unwind info.
8692 return false;
8693
8694 // It's safe to outline from MF.
8695 return true;
8696}
8697
8700 unsigned &Flags) const {
8702 "Must track liveness!");
8704 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8705 Ranges;
8706 // According to the AArch64 Procedure Call Standard, the following are
8707 // undefined on entry/exit from a function call:
8708 //
8709 // * Registers x16, x17, (and thus w16, w17)
8710 // * Condition codes (and thus the NZCV register)
8711 //
8712 // If any of these registers are used inside or live across an outlined
8713 // function, then they may be modified later, either by the compiler or
8714 // some other tool (like the linker).
8715 //
8716 // To avoid outlining in these situations, partition each block into ranges
8717 // where these registers are dead. We will only outline from those ranges.
8719 auto AreAllUnsafeRegsDead = [&LRU]() {
8720 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8721 LRU.available(AArch64::NZCV);
8722 };
8723
8724 // We need to know if LR is live across an outlining boundary later on in
8725 // order to decide how we'll create the outlined call, frame, etc.
8726 //
8727 // It's pretty expensive to check this for *every candidate* within a block.
8728 // That's some potentially n^2 behaviour, since in the worst case, we'd need
8729 // to compute liveness from the end of the block for O(n) candidates within
8730 // the block.
8731 //
8732 // So, to improve the average case, let's keep track of liveness from the end
8733 // of the block to the beginning of *every outlinable range*. If we know that
8734 // LR is available in every range we could outline from, then we know that
8735 // we don't need to check liveness for any candidate within that range.
8736 bool LRAvailableEverywhere = true;
8737 // Compute liveness bottom-up.
8738 LRU.addLiveOuts(MBB);
8739 // Update flags that require info about the entire MBB.
8740 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8741 if (MI.isCall() && !MI.isTerminator())
8742 Flags |= MachineOutlinerMBBFlags::HasCalls;
8743 };
8744 // Range: [RangeBegin, RangeEnd)
8745 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8746 unsigned RangeLen;
8747 auto CreateNewRangeStartingAt =
8748 [&RangeBegin, &RangeEnd,
8749 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8750 RangeBegin = NewBegin;
8751 RangeEnd = std::next(RangeBegin);
8752 RangeLen = 0;
8753 };
8754 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8755 // At least one unsafe register is not dead. We do not want to outline at
8756 // this point. If it is long enough to outline from, save the range
8757 // [RangeBegin, RangeEnd).
8758 if (RangeLen > 1)
8759 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
8760 };
8761 // Find the first point where all unsafe registers are dead.
8762 // FIND: <safe instr> <-- end of first potential range
8763 // SKIP: <unsafe def>
8764 // SKIP: ... everything between ...
8765 // SKIP: <unsafe use>
8766 auto FirstPossibleEndPt = MBB.instr_rbegin();
8767 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8768 LRU.stepBackward(*FirstPossibleEndPt);
8769 // Update flags that impact how we outline across the entire block,
8770 // regardless of safety.
8771 UpdateWholeMBBFlags(*FirstPossibleEndPt);
8772 if (AreAllUnsafeRegsDead())
8773 break;
8774 }
8775 // If we exhausted the entire block, we have no safe ranges to outline.
8776 if (FirstPossibleEndPt == MBB.instr_rend())
8777 return Ranges;
8778 // Current range.
8779 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
8780 // StartPt points to the first place where all unsafe registers
8781 // are dead (if there is any such point). Begin partitioning the MBB into
8782 // ranges.
8783 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
8784 LRU.stepBackward(MI);
8785 UpdateWholeMBBFlags(MI);
8786 if (!AreAllUnsafeRegsDead()) {
8787 SaveRangeIfNonEmpty();
8788 CreateNewRangeStartingAt(MI.getIterator());
8789 continue;
8790 }
8791 LRAvailableEverywhere &= LRU.available(AArch64::LR);
8792 RangeBegin = MI.getIterator();
8793 ++RangeLen;
8794 }
8795 // Above loop misses the last (or only) range. If we are still safe, then
8796 // let's save the range.
8797 if (AreAllUnsafeRegsDead())
8798 SaveRangeIfNonEmpty();
8799 if (Ranges.empty())
8800 return Ranges;
8801 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
8802 // the order.
8803 std::reverse(Ranges.begin(), Ranges.end());
8804 // If there is at least one outlinable range where LR is unavailable
8805 // somewhere, remember that.
8806 if (!LRAvailableEverywhere)
8807 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8808 return Ranges;
8809}
8810
8813 unsigned Flags) const {
8814 MachineInstr &MI = *MIT;
8815 MachineBasicBlock *MBB = MI.getParent();
8816 MachineFunction *MF = MBB->getParent();
8818
8819 // Don't outline anything used for return address signing. The outlined
8820 // function will get signed later if needed
8821 switch (MI.getOpcode()) {
8822 case AArch64::PACM:
8823 case AArch64::PACIASP:
8824 case AArch64::PACIBSP:
8825 case AArch64::PACIASPPC:
8826 case AArch64::PACIBSPPC:
8827 case AArch64::AUTIASP:
8828 case AArch64::AUTIBSP:
8829 case AArch64::AUTIASPPCi:
8830 case AArch64::AUTIASPPCr:
8831 case AArch64::AUTIBSPPCi:
8832 case AArch64::AUTIBSPPCr:
8833 case AArch64::RETAA:
8834 case AArch64::RETAB:
8835 case AArch64::RETAASPPCi:
8836 case AArch64::RETAASPPCr:
8837 case AArch64::RETABSPPCi:
8838 case AArch64::RETABSPPCr:
8839 case AArch64::EMITBKEY:
8840 case AArch64::PAUTH_PROLOGUE:
8841 case AArch64::PAUTH_EPILOGUE:
8843 }
8844
8845 // Don't outline LOHs.
8846 if (FuncInfo->getLOHRelated().count(&MI))
8848
8849 // We can only outline these if we will tail call the outlined function, or
8850 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
8851 // in a tail call.
8852 //
8853 // FIXME: If the proper fixups for the offset are implemented, this should be
8854 // possible.
8855 if (MI.isCFIInstruction())
8857
8858 // Is this a terminator for a basic block?
8859 if (MI.isTerminator())
8860 // TargetInstrInfo::getOutliningType has already filtered out anything
8861 // that would break this, so we can allow it here.
8863
8864 // Make sure none of the operands are un-outlinable.
8865 for (const MachineOperand &MOP : MI.operands()) {
8866 // A check preventing CFI indices was here before, but only CFI
8867 // instructions should have those.
8868 assert(!MOP.isCFIIndex());
8869
8870 // If it uses LR or W30 explicitly, then don't touch it.
8871 if (MOP.isReg() && !MOP.isImplicit() &&
8872 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
8874 }
8875
8876 // Special cases for instructions that can always be outlined, but will fail
8877 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8878 // be outlined because they don't require a *specific* value to be in LR.
8879 if (MI.getOpcode() == AArch64::ADRP)
8881
8882 // If MI is a call we might be able to outline it. We don't want to outline
8883 // any calls that rely on the position of items on the stack. When we outline
8884 // something containing a call, we have to emit a save and restore of LR in
8885 // the outlined function. Currently, this always happens by saving LR to the
8886 // stack. Thus, if we outline, say, half the parameters for a function call
8887 // plus the call, then we'll break the callee's expectations for the layout
8888 // of the stack.
8889 //
8890 // FIXME: Allow calls to functions which construct a stack frame, as long
8891 // as they don't access arguments on the stack.
8892 // FIXME: Figure out some way to analyze functions defined in other modules.
8893 // We should be able to compute the memory usage based on the IR calling
8894 // convention, even if we can't see the definition.
8895 if (MI.isCall()) {
8896 // Get the function associated with the call. Look at each operand and find
8897 // the one that represents the callee and get its name.
8898 const Function *Callee = nullptr;
8899 for (const MachineOperand &MOP : MI.operands()) {
8900 if (MOP.isGlobal()) {
8901 Callee = dyn_cast<Function>(MOP.getGlobal());
8902 break;
8903 }
8904 }
8905
8906 // Never outline calls to mcount. There isn't any rule that would require
8907 // this, but the Linux kernel's "ftrace" feature depends on it.
8908 if (Callee && Callee->getName() == "\01_mcount")
8910
8911 // If we don't know anything about the callee, assume it depends on the
8912 // stack layout of the caller. In that case, it's only legal to outline
8913 // as a tail-call. Explicitly list the call instructions we know about so we
8914 // don't get unexpected results with call pseudo-instructions.
8915 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8916 if (MI.getOpcode() == AArch64::BLR ||
8917 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
8918 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8919
8920 if (!Callee)
8921 return UnknownCallOutlineType;
8922
8923 // We have a function we have information about. Check it if it's something
8924 // can safely outline.
8925 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
8926
8927 // We don't know what's going on with the callee at all. Don't touch it.
8928 if (!CalleeMF)
8929 return UnknownCallOutlineType;
8930
8931 // Check if we know anything about the callee saves on the function. If we
8932 // don't, then don't touch it, since that implies that we haven't
8933 // computed anything about its stack frame yet.
8934 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8935 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
8936 MFI.getNumObjects() > 0)
8937 return UnknownCallOutlineType;
8938
8939 // At this point, we can say that CalleeMF ought to not pass anything on the
8940 // stack. Therefore, we can outline it.
8942 }
8943
8944 // Don't touch the link register or W30.
8945 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
8946 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
8948
8949 // Don't outline BTI instructions, because that will prevent the outlining
8950 // site from being indirectly callable.
8951 if (hasBTISemantics(MI))
8953
8955}
8956
8957void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
8958 for (MachineInstr &MI : MBB) {
8959 const MachineOperand *Base;
8960 TypeSize Width(0, false);
8961 int64_t Offset;
8962 bool OffsetIsScalable;
8963
8964 // Is this a load or store with an immediate offset with SP as the base?
8965 if (!MI.mayLoadOrStore() ||
8966 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
8967 &RI) ||
8968 (Base->isReg() && Base->getReg() != AArch64::SP))
8969 continue;
8970
8971 // It is, so we have to fix it up.
8972 TypeSize Scale(0U, false);
8973 int64_t Dummy1, Dummy2;
8974
8976 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
8977 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
8978 assert(Scale != 0 && "Unexpected opcode!");
8979 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
8980
8981 // We've pushed the return address to the stack, so add 16 to the offset.
8982 // This is safe, since we already checked if it would overflow when we
8983 // checked if this instruction was legal to outline.
8984 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
8985 StackOffsetOperand.setImm(NewImm);
8986 }
8987}
8988
8990 const AArch64InstrInfo *TII,
8991 bool ShouldSignReturnAddr) {
8992 if (!ShouldSignReturnAddr)
8993 return;
8994
8995 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
8998 TII->get(AArch64::PAUTH_EPILOGUE))
9000}
9001
9004 const outliner::OutlinedFunction &OF) const {
9005
9007
9009 FI->setOutliningStyle("Tail Call");
9011 // For thunk outlining, rewrite the last instruction from a call to a
9012 // tail-call.
9013 MachineInstr *Call = &*--MBB.instr_end();
9014 unsigned TailOpcode;
9015 if (Call->getOpcode() == AArch64::BL) {
9016 TailOpcode = AArch64::TCRETURNdi;
9017 } else {
9018 assert(Call->getOpcode() == AArch64::BLR ||
9019 Call->getOpcode() == AArch64::BLRNoIP);
9020 TailOpcode = AArch64::TCRETURNriALL;
9021 }
9022 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9023 .add(Call->getOperand(0))
9024 .addImm(0);
9025 MBB.insert(MBB.end(), TC);
9026 Call->eraseFromParent();
9027
9028 FI->setOutliningStyle("Thunk");
9029 }
9030
9031 bool IsLeafFunction = true;
9032
9033 // Is there a call in the outlined range?
9034 auto IsNonTailCall = [](const MachineInstr &MI) {
9035 return MI.isCall() && !MI.isReturn();
9036 };
9037
9038 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9039 // Fix up the instructions in the range, since we're going to modify the
9040 // stack.
9041
9042 // Bugzilla ID: 46767
9043 // TODO: Check if fixing up twice is safe so we can outline these.
9045 "Can only fix up stack references once");
9046 fixupPostOutline(MBB);
9047
9048 IsLeafFunction = false;
9049
9050 // LR has to be a live in so that we can save it.
9051 if (!MBB.isLiveIn(AArch64::LR))
9052 MBB.addLiveIn(AArch64::LR);
9053
9056
9059 Et = std::prev(MBB.end());
9060
9061 // Insert a save before the outlined region
9062 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9063 .addReg(AArch64::SP, RegState::Define)
9064 .addReg(AArch64::LR)
9065 .addReg(AArch64::SP)
9066 .addImm(-16);
9067 It = MBB.insert(It, STRXpre);
9068
9070 const TargetSubtargetInfo &STI = MF.getSubtarget();
9071 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9072 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9073
9074 // Add a CFI saying the stack was moved 16 B down.
9075 int64_t StackPosEntry =
9077 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9078 .addCFIIndex(StackPosEntry)
9080
9081 // Add a CFI saying that the LR that we want to find is now 16 B higher
9082 // than before.
9083 int64_t LRPosEntry = MF.addFrameInst(
9084 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9085 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9086 .addCFIIndex(LRPosEntry)
9088 }
9089
9090 // Insert a restore before the terminator for the function.
9091 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9092 .addReg(AArch64::SP, RegState::Define)
9093 .addReg(AArch64::LR, RegState::Define)
9094 .addReg(AArch64::SP)
9095 .addImm(16);
9096 Et = MBB.insert(Et, LDRXpost);
9097 }
9098
9099 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9100
9101 // If this is a tail call outlined function, then there's already a return.
9104 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9105 return;
9106 }
9107
9108 // It's not a tail call, so we have to insert the return ourselves.
9109
9110 // LR has to be a live in so that we can return to it.
9111 if (!MBB.isLiveIn(AArch64::LR))
9112 MBB.addLiveIn(AArch64::LR);
9113
9114 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9115 .addReg(AArch64::LR);
9116 MBB.insert(MBB.end(), ret);
9117
9118 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9119
9120 FI->setOutliningStyle("Function");
9121
9122 // Did we have to modify the stack by saving the link register?
9124 return;
9125
9126 // We modified the stack.
9127 // Walk over the basic block and fix up all the stack accesses.
9128 fixupPostOutline(MBB);
9129}
9130
9134
9135 // Are we tail calling?
9136 if (C.CallConstructionID == MachineOutlinerTailCall) {
9137 // If yes, then we can just branch to the label.
9138 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9139 .addGlobalAddress(M.getNamedValue(MF.getName()))
9140 .addImm(0));
9141 return It;
9142 }
9143
9144 // Are we saving the link register?
9145 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9146 C.CallConstructionID == MachineOutlinerThunk) {
9147 // No, so just insert the call.
9148 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9149 .addGlobalAddress(M.getNamedValue(MF.getName())));
9150 return It;
9151 }
9152
9153 // We want to return the spot where we inserted the call.
9155
9156 // Instructions for saving and restoring LR around the call instruction we're
9157 // going to insert.
9158 MachineInstr *Save;
9159 MachineInstr *Restore;
9160 // Can we save to a register?
9161 if (C.CallConstructionID == MachineOutlinerRegSave) {
9162 // FIXME: This logic should be sunk into a target-specific interface so that
9163 // we don't have to recompute the register.
9164 Register Reg = findRegisterToSaveLRTo(C);
9165 assert(Reg && "No callee-saved register available?");
9166
9167 // LR has to be a live in so that we can save it.
9168 if (!MBB.isLiveIn(AArch64::LR))
9169 MBB.addLiveIn(AArch64::LR);
9170
9171 // Save and restore LR from Reg.
9172 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9173 .addReg(AArch64::XZR)
9174 .addReg(AArch64::LR)
9175 .addImm(0);
9176 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9177 .addReg(AArch64::XZR)
9178 .addReg(Reg)
9179 .addImm(0);
9180 } else {
9181 // We have the default case. Save and restore from SP.
9182 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9183 .addReg(AArch64::SP, RegState::Define)
9184 .addReg(AArch64::LR)
9185 .addReg(AArch64::SP)
9186 .addImm(-16);
9187 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9188 .addReg(AArch64::SP, RegState::Define)
9189 .addReg(AArch64::LR, RegState::Define)
9190 .addReg(AArch64::SP)
9191 .addImm(16);
9192 }
9193
9194 It = MBB.insert(It, Save);
9195 It++;
9196
9197 // Insert the call.
9198 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9199 .addGlobalAddress(M.getNamedValue(MF.getName())));
9200 CallPt = It;
9201 It++;
9202
9203 It = MBB.insert(It, Restore);
9204 return CallPt;
9205}
9206
9208 MachineFunction &MF) const {
9209 return MF.getFunction().hasMinSize();
9210}
9211
9214 DebugLoc &DL,
9215 bool AllowSideEffects) const {
9216 const MachineFunction &MF = *MBB.getParent();
9218 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9219
9220 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9221 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9222 } else if (STI.hasSVE()) {
9223 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9224 .addImm(0)
9225 .addImm(0);
9226 } else {
9227 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9228 .addImm(0);
9229 }
9230}
9231
9232std::optional<DestSourcePair>
9234
9235 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9236 // and zero immediate operands used as an alias for mov instruction.
9237 if (MI.getOpcode() == AArch64::ORRWrs &&
9238 MI.getOperand(1).getReg() == AArch64::WZR &&
9239 MI.getOperand(3).getImm() == 0x0 &&
9240 // Check that the w->w move is not a zero-extending w->x mov.
9241 (!MI.getOperand(0).getReg().isVirtual() ||
9242 MI.getOperand(0).getSubReg() == 0) &&
9243 (!MI.getOperand(0).getReg().isPhysical() ||
9244 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9245 AArch64::X0) == -1))
9246 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9247
9248 if (MI.getOpcode() == AArch64::ORRXrs &&
9249 MI.getOperand(1).getReg() == AArch64::XZR &&
9250 MI.getOperand(3).getImm() == 0x0)
9251 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9252
9253 return std::nullopt;
9254}
9255
9256std::optional<DestSourcePair>
9258 if (MI.getOpcode() == AArch64::ORRWrs &&
9259 MI.getOperand(1).getReg() == AArch64::WZR &&
9260 MI.getOperand(3).getImm() == 0x0)
9261 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9262 return std::nullopt;
9263}
9264
9265std::optional<RegImmPair>
9267 int Sign = 1;
9268 int64_t Offset = 0;
9269
9270 // TODO: Handle cases where Reg is a super- or sub-register of the
9271 // destination register.
9272 const MachineOperand &Op0 = MI.getOperand(0);
9273 if (!Op0.isReg() || Reg != Op0.getReg())
9274 return std::nullopt;
9275
9276 switch (MI.getOpcode()) {
9277 default:
9278 return std::nullopt;
9279 case AArch64::SUBWri:
9280 case AArch64::SUBXri:
9281 case AArch64::SUBSWri:
9282 case AArch64::SUBSXri:
9283 Sign *= -1;
9284 [[fallthrough]];
9285 case AArch64::ADDSWri:
9286 case AArch64::ADDSXri:
9287 case AArch64::ADDWri:
9288 case AArch64::ADDXri: {
9289 // TODO: Third operand can be global address (usually some string).
9290 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9291 !MI.getOperand(2).isImm())
9292 return std::nullopt;
9293 int Shift = MI.getOperand(3).getImm();
9294 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9295 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9296 }
9297 }
9298 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9299}
9300
9301/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9302/// the destination register then, if possible, describe the value in terms of
9303/// the source register.
9304static std::optional<ParamLoadedValue>
9306 const TargetInstrInfo *TII,
9307 const TargetRegisterInfo *TRI) {
9308 auto DestSrc = TII->isCopyLikeInstr(MI);
9309 if (!DestSrc)
9310 return std::nullopt;
9311
9312 Register DestReg = DestSrc->Destination->getReg();
9313 Register SrcReg = DestSrc->Source->getReg();
9314
9315 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9316
9317 // If the described register is the destination, just return the source.
9318 if (DestReg == DescribedReg)
9319 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9320
9321 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9322 if (MI.getOpcode() == AArch64::ORRWrs &&
9323 TRI->isSuperRegister(DestReg, DescribedReg))
9324 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9325
9326 // We may need to describe the lower part of a ORRXrs move.
9327 if (MI.getOpcode() == AArch64::ORRXrs &&
9328 TRI->isSubRegister(DestReg, DescribedReg)) {
9329 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9330 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9331 }
9332
9333 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9334 "Unhandled ORR[XW]rs copy case");
9335
9336 return std::nullopt;
9337}
9338
9340 // Functions cannot be split to different sections on AArch64 if they have
9341 // a red zone. This is because relaxing a cross-section branch may require
9342 // incrementing the stack pointer to spill a register, which would overwrite
9343 // the red zone.
9344 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9345 return false;
9346
9348}
9349
9351 const MachineBasicBlock &MBB) const {
9352 // Asm Goto blocks can contain conditional branches to goto labels, which can
9353 // get moved out of range of the branch instruction.
9354 auto isAsmGoto = [](const MachineInstr &MI) {
9355 return MI.getOpcode() == AArch64::INLINEASM_BR;
9356 };
9357 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9358 return false;
9359
9360 // Because jump tables are label-relative instead of table-relative, they all
9361 // must be in the same section or relocation fixup handling will fail.
9362
9363 // Check if MBB is a jump table target
9365 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9366 return llvm::is_contained(JTE.MBBs, &MBB);
9367 };
9368 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9369 return false;
9370
9371 // Check if MBB contains a jump table lookup
9372 for (const MachineInstr &MI : MBB) {
9373 switch (MI.getOpcode()) {
9374 case TargetOpcode::G_BRJT:
9375 case AArch64::JumpTableDest32:
9376 case AArch64::JumpTableDest16:
9377 case AArch64::JumpTableDest8:
9378 return false;
9379 default:
9380 continue;
9381 }
9382 }
9383
9384 // MBB isn't a special case, so it's safe to be split to the cold section.
9385 return true;
9386}
9387
9388std::optional<ParamLoadedValue>
9390 Register Reg) const {
9391 const MachineFunction *MF = MI.getMF();
9393 switch (MI.getOpcode()) {
9394 case AArch64::MOVZWi:
9395 case AArch64::MOVZXi: {
9396 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9397 // 64-bit parameters, so we need to consider super-registers.
9398 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9399 return std::nullopt;
9400
9401 if (!MI.getOperand(1).isImm())
9402 return std::nullopt;
9403 int64_t Immediate = MI.getOperand(1).getImm();
9404 int Shift = MI.getOperand(2).getImm();
9405 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9406 nullptr);
9407 }
9408 case AArch64::ORRWrs:
9409 case AArch64::ORRXrs:
9410 return describeORRLoadedValue(MI, Reg, this, TRI);
9411 }
9412
9414}
9415
9417 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9418 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9419 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9420 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9421
9422 // Anyexts are nops.
9423 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9424 return true;
9425
9426 Register DefReg = ExtMI.getOperand(0).getReg();
9427 if (!MRI.hasOneNonDBGUse(DefReg))
9428 return false;
9429
9430 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9431 // addressing mode.
9432 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9433 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9434}
9435
9437 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9438}
9439
9440bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9441 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9442}
9443
9444bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9445 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9446}
9447
9448unsigned int
9450 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9451}
9452
9453bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9454 unsigned Scale) const {
9455 if (Offset && Scale)
9456 return false;
9457
9458 // Check Reg + Imm
9459 if (!Scale) {
9460 // 9-bit signed offset
9461 if (isInt<9>(Offset))
9462 return true;
9463
9464 // 12-bit unsigned offset
9465 unsigned Shift = Log2_64(NumBytes);
9466 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9467 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9468 (Offset >> Shift) << Shift == Offset)
9469 return true;
9470 return false;
9471 }
9472
9473 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9474 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9475}
9476
9478 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9479 return AArch64::BLRNoIP;
9480 else
9481 return AArch64::BLR;
9482}
9483
9486 Register TargetReg, bool FrameSetup) const {
9487 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9488
9490 MachineFunction &MF = *MBB.getParent();
9491 const AArch64InstrInfo *TII =
9492 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9493 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9495
9496 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9497 MachineBasicBlock *LoopTestMBB =
9499 MF.insert(MBBInsertPoint, LoopTestMBB);
9500 MachineBasicBlock *LoopBodyMBB =
9502 MF.insert(MBBInsertPoint, LoopBodyMBB);
9504 MF.insert(MBBInsertPoint, ExitMBB);
9505 MachineInstr::MIFlag Flags =
9507
9508 // LoopTest:
9509 // SUB SP, SP, #ProbeSize
9510 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9511 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9512
9513 // CMP SP, TargetReg
9514 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9515 AArch64::XZR)
9516 .addReg(AArch64::SP)
9517 .addReg(TargetReg)
9519 .setMIFlags(Flags);
9520
9521 // B.<Cond> LoopExit
9522 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9524 .addMBB(ExitMBB)
9525 .setMIFlags(Flags);
9526
9527 // STR XZR, [SP]
9528 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9529 .addReg(AArch64::XZR)
9530 .addReg(AArch64::SP)
9531 .addImm(0)
9532 .setMIFlags(Flags);
9533
9534 // B loop
9535 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9536 .addMBB(LoopTestMBB)
9537 .setMIFlags(Flags);
9538
9539 // LoopExit:
9540 // MOV SP, TargetReg
9541 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9542 .addReg(TargetReg)
9543 .addImm(0)
9545 .setMIFlags(Flags);
9546
9547 // LDR XZR, [SP]
9548 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9549 .addReg(AArch64::XZR, RegState::Define)
9550 .addReg(AArch64::SP)
9551 .addImm(0)
9552 .setMIFlags(Flags);
9553
9554 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9556
9557 LoopTestMBB->addSuccessor(ExitMBB);
9558 LoopTestMBB->addSuccessor(LoopBodyMBB);
9559 LoopBodyMBB->addSuccessor(LoopTestMBB);
9560 MBB.addSuccessor(LoopTestMBB);
9561
9562 // Update liveins.
9563 if (MF.getRegInfo().reservedRegsFrozen()) {
9564 bool anyChange = false;
9565 do {
9566 anyChange = recomputeLiveIns(*ExitMBB) ||
9567 recomputeLiveIns(*LoopBodyMBB) ||
9568 recomputeLiveIns(*LoopTestMBB);
9569 } while (anyChange);
9570 ;
9571 }
9572
9573 return ExitMBB->begin();
9574}
9575
9576namespace {
9577class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9578 MachineInstr *PredBranch;
9580
9581public:
9582 AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
9584 : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {}
9585
9586 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
9587 // Make the instructions for loop control be placed in stage 0.
9588 // The predecessors of PredBranch are considered by the caller.
9589 return MI == PredBranch;
9590 }
9591
9592 std::optional<bool> createTripCountGreaterCondition(
9593 int TC, MachineBasicBlock &MBB,
9594 SmallVectorImpl<MachineOperand> &CondParam) override {
9595 // A branch instruction will be inserted as "if (Cond) goto epilogue".
9596 // Cond is normalized for such use.
9597 // The predecessors of the branch are assumed to have already been inserted.
9598 CondParam = Cond;
9599 return {};
9600 }
9601
9602 void setPreheader(MachineBasicBlock *NewPreheader) override {}
9603
9604 void adjustTripCount(int TripCountAdjust) override {}
9605
9606 void disposed() override {}
9607};
9608} // namespace
9609
9610static bool isCompareAndBranch(unsigned Opcode) {
9611 switch (Opcode) {
9612 case AArch64::CBZW:
9613 case AArch64::CBZX:
9614 case AArch64::CBNZW:
9615 case AArch64::CBNZX:
9616 case AArch64::TBZW:
9617 case AArch64::TBZX:
9618 case AArch64::TBNZW:
9619 case AArch64::TBNZX:
9620 return true;
9621 }
9622 return false;
9623}
9624
9625std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9627 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
9629 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
9630 return nullptr;
9631
9632 // Infinite loops are not supported
9633 if (TBB == LoopBB && FBB == LoopBB)
9634 return nullptr;
9635
9636 // Must be conditional branch
9637 if (FBB == nullptr)
9638 return nullptr;
9639
9640 assert((TBB == LoopBB || FBB == LoopBB) &&
9641 "The Loop must be a single-basic-block loop");
9642
9643 // Normalization for createTripCountGreaterCondition()
9644 if (TBB == LoopBB)
9646
9647 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
9649
9650 // Find the immediate predecessor of the conditional branch
9651 MachineInstr *PredBranch = nullptr;
9652 if (CondBranch->getOpcode() == AArch64::Bcc) {
9653 for (MachineInstr &MI : reverse(*LoopBB)) {
9654 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9655 PredBranch = &MI;
9656 break;
9657 }
9658 }
9659 if (!PredBranch)
9660 return nullptr;
9661 } else if (isCompareAndBranch(CondBranch->getOpcode())) {
9662 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9663 Register Reg = CondBranch->getOperand(0).getReg();
9664 if (!Reg.isVirtual())
9665 return nullptr;
9666 PredBranch = MRI.getVRegDef(Reg);
9667
9668 // MachinePipeliner does not expect that the immediate predecessor is a Phi
9669 if (PredBranch->isPHI())
9670 return nullptr;
9671
9672 if (PredBranch->getParent() != LoopBB)
9673 return nullptr;
9674 } else {
9675 return nullptr;
9676 }
9677
9678 return std::make_unique<AArch64PipelinerLoopInfo>(PredBranch, Cond);
9679}
9680
9681#define GET_INSTRINFO_HELPERS
9682#define GET_INSTRMAP_INFO
9683#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Find other MI combine patterns.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCompareAndBranch(unsigned Opcode)
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Floating-Point Support.
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Find instructions that can be turned into madd.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
bool isThroughputPattern(MachineCombinerPattern Pattern) const override
Return true when a code sequence can improve throughput.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
static bool isFpOrNEON(const MachineInstr &MI)
Returns whether the instruction is FP or NEON.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:680
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:377
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:757
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:372
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:333
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static constexpr unsigned SVEMaxBitsPerVector
static const uint64_t InstrFlagIsPTestLike
static constexpr unsigned SVEBitsPerBlock
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:233
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static bool recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for a MBB.
Definition: LivePhysRegs.h:198
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.