LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/MC/MCAsmInfo.h"
43#include "llvm/MC/MCInst.h"
45#include "llvm/MC/MCInstrDesc.h"
50#include "llvm/Support/LEB128.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57#include <utility>
58
59using namespace llvm;
60
61#define GET_INSTRINFO_CTOR_DTOR
62#include "AArch64GenInstrInfo.inc"
63
65 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
66 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
67
69 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
70 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
71
73 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
74 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
75
77 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
78 cl::desc("Restrict range of B instructions (DEBUG)"));
79
81 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
82 AArch64::CATCHRET),
83 RI(STI.getTargetTriple()), Subtarget(STI) {}
84
85/// GetInstSize - Return the number of bytes of code the specified
86/// instruction may be. This returns the maximum number of bytes.
88 const MachineBasicBlock &MBB = *MI.getParent();
89 const MachineFunction *MF = MBB.getParent();
90 const Function &F = MF->getFunction();
91 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
92
93 {
94 auto Op = MI.getOpcode();
95 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
96 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
97 }
98
99 // Meta-instructions emit no code.
100 if (MI.isMetaInstruction())
101 return 0;
102
103 // FIXME: We currently only handle pseudoinstructions that don't get expanded
104 // before the assembly printer.
105 unsigned NumBytes = 0;
106 const MCInstrDesc &Desc = MI.getDesc();
107
108 // Size should be preferably set in
109 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
110 // Specific cases handle instructions of variable sizes
111 switch (Desc.getOpcode()) {
112 default:
113 if (Desc.getSize())
114 return Desc.getSize();
115
116 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
117 // with fixed constant size but not specified in .td file) is a normal
118 // 4-byte insn.
119 NumBytes = 4;
120 break;
121 case TargetOpcode::STACKMAP:
122 // The upper bound for a stackmap intrinsic is the full length of its shadow
123 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
124 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
125 break;
126 case TargetOpcode::PATCHPOINT:
127 // The size of the patchpoint intrinsic is the number of bytes requested
128 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
129 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
130 break;
131 case TargetOpcode::STATEPOINT:
132 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
133 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
134 // No patch bytes means a normal call inst is emitted
135 if (NumBytes == 0)
136 NumBytes = 4;
137 break;
138 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
139 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
140 // instructions are expanded to the specified number of NOPs. Otherwise,
141 // they are expanded to 36-byte XRay sleds.
142 NumBytes =
143 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
144 break;
145 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
146 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
147 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
148 NumBytes = 36;
149 break;
150 case TargetOpcode::PATCHABLE_EVENT_CALL:
151 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
152 NumBytes = 24;
153 break;
154
155 case AArch64::SPACE:
156 NumBytes = MI.getOperand(1).getImm();
157 break;
158 case TargetOpcode::BUNDLE:
159 NumBytes = getInstBundleLength(MI);
160 break;
161 }
162
163 return NumBytes;
164}
165
166unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
167 unsigned Size = 0;
169 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
170 while (++I != E && I->isInsideBundle()) {
171 assert(!I->isBundle() && "No nested bundle!");
173 }
174 return Size;
175}
176
179 // Block ends with fall-through condbranch.
180 switch (LastInst->getOpcode()) {
181 default:
182 llvm_unreachable("Unknown branch instruction?");
183 case AArch64::Bcc:
184 Target = LastInst->getOperand(1).getMBB();
185 Cond.push_back(LastInst->getOperand(0));
186 break;
187 case AArch64::CBZW:
188 case AArch64::CBZX:
189 case AArch64::CBNZW:
190 case AArch64::CBNZX:
191 Target = LastInst->getOperand(1).getMBB();
192 Cond.push_back(MachineOperand::CreateImm(-1));
193 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
194 Cond.push_back(LastInst->getOperand(0));
195 break;
196 case AArch64::TBZW:
197 case AArch64::TBZX:
198 case AArch64::TBNZW:
199 case AArch64::TBNZX:
200 Target = LastInst->getOperand(2).getMBB();
201 Cond.push_back(MachineOperand::CreateImm(-1));
202 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
203 Cond.push_back(LastInst->getOperand(0));
204 Cond.push_back(LastInst->getOperand(1));
205 }
206}
207
208static unsigned getBranchDisplacementBits(unsigned Opc) {
209 switch (Opc) {
210 default:
211 llvm_unreachable("unexpected opcode!");
212 case AArch64::B:
213 return BDisplacementBits;
214 case AArch64::TBNZW:
215 case AArch64::TBZW:
216 case AArch64::TBNZX:
217 case AArch64::TBZX:
218 return TBZDisplacementBits;
219 case AArch64::CBNZW:
220 case AArch64::CBZW:
221 case AArch64::CBNZX:
222 case AArch64::CBZX:
223 return CBZDisplacementBits;
224 case AArch64::Bcc:
225 return BCCDisplacementBits;
226 }
227}
228
230 int64_t BrOffset) const {
231 unsigned Bits = getBranchDisplacementBits(BranchOp);
232 assert(Bits >= 3 && "max branch displacement must be enough to jump"
233 "over conditional branch expansion");
234 return isIntN(Bits, BrOffset / 4);
235}
236
239 switch (MI.getOpcode()) {
240 default:
241 llvm_unreachable("unexpected opcode!");
242 case AArch64::B:
243 return MI.getOperand(0).getMBB();
244 case AArch64::TBZW:
245 case AArch64::TBNZW:
246 case AArch64::TBZX:
247 case AArch64::TBNZX:
248 return MI.getOperand(2).getMBB();
249 case AArch64::CBZW:
250 case AArch64::CBNZW:
251 case AArch64::CBZX:
252 case AArch64::CBNZX:
253 case AArch64::Bcc:
254 return MI.getOperand(1).getMBB();
255 }
256}
257
259 MachineBasicBlock &NewDestBB,
260 MachineBasicBlock &RestoreBB,
261 const DebugLoc &DL,
262 int64_t BrOffset,
263 RegScavenger *RS) const {
264 assert(RS && "RegScavenger required for long branching");
265 assert(MBB.empty() &&
266 "new block should be inserted for expanding unconditional branch");
267 assert(MBB.pred_size() == 1);
268 assert(RestoreBB.empty() &&
269 "restore block should be inserted for restoring clobbered registers");
270
271 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
272 // Offsets outside of the signed 33-bit range are not supported for ADRP +
273 // ADD.
274 if (!isInt<33>(BrOffset))
276 "Branch offsets outside of the signed 33-bit range not supported");
277
278 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
279 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
281 .addReg(Reg)
282 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
283 .addImm(0);
284 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
285 };
286
288 // If X16 is unused, we can rely on the linker to insert a range extension
289 // thunk if NewDestBB is out of range of a single B instruction.
290 constexpr Register Reg = AArch64::X16;
291 if (!RS->isRegUsed(Reg)) {
292 insertUnconditionalBranch(MBB, &NewDestBB, DL);
293 RS->setRegUsed(Reg);
294 return;
295 }
296
297 // If there's a free register and it's worth inflating the code size,
298 // manually insert the indirect branch.
299 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
300 if (Scavenged != AArch64::NoRegister &&
302 buildIndirectBranch(Scavenged, NewDestBB);
303 RS->setRegUsed(Scavenged);
304 return;
305 }
306
307 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
308 // with red zones.
310 if (!AFI || AFI->hasRedZone().value_or(true))
312 "Unable to insert indirect branch inside function that has red zone");
313
314 // Otherwise, spill X16 and defer range extension to the linker.
315 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
316 .addReg(AArch64::SP, RegState::Define)
317 .addReg(Reg)
318 .addReg(AArch64::SP)
319 .addImm(-16);
320
321 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
322
323 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
324 .addReg(AArch64::SP, RegState::Define)
326 .addReg(AArch64::SP)
327 .addImm(16);
328}
329
330// Branch analysis.
333 MachineBasicBlock *&FBB,
335 bool AllowModify) const {
336 // If the block has no terminators, it just falls into the block after it.
338 if (I == MBB.end())
339 return false;
340
341 // Skip over SpeculationBarrierEndBB terminators
342 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
343 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
344 --I;
345 }
346
347 if (!isUnpredicatedTerminator(*I))
348 return false;
349
350 // Get the last instruction in the block.
351 MachineInstr *LastInst = &*I;
352
353 // If there is only one terminator instruction, process it.
354 unsigned LastOpc = LastInst->getOpcode();
355 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
356 if (isUncondBranchOpcode(LastOpc)) {
357 TBB = LastInst->getOperand(0).getMBB();
358 return false;
359 }
360 if (isCondBranchOpcode(LastOpc)) {
361 // Block ends with fall-through condbranch.
362 parseCondBranch(LastInst, TBB, Cond);
363 return false;
364 }
365 return true; // Can't handle indirect branch.
366 }
367
368 // Get the instruction before it if it is a terminator.
369 MachineInstr *SecondLastInst = &*I;
370 unsigned SecondLastOpc = SecondLastInst->getOpcode();
371
372 // If AllowModify is true and the block ends with two or more unconditional
373 // branches, delete all but the first unconditional branch.
374 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
375 while (isUncondBranchOpcode(SecondLastOpc)) {
376 LastInst->eraseFromParent();
377 LastInst = SecondLastInst;
378 LastOpc = LastInst->getOpcode();
379 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
380 // Return now the only terminator is an unconditional branch.
381 TBB = LastInst->getOperand(0).getMBB();
382 return false;
383 }
384 SecondLastInst = &*I;
385 SecondLastOpc = SecondLastInst->getOpcode();
386 }
387 }
388
389 // If we're allowed to modify and the block ends in a unconditional branch
390 // which could simply fallthrough, remove the branch. (Note: This case only
391 // matters when we can't understand the whole sequence, otherwise it's also
392 // handled by BranchFolding.cpp.)
393 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
395 LastInst->eraseFromParent();
396 LastInst = SecondLastInst;
397 LastOpc = LastInst->getOpcode();
398 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
399 assert(!isUncondBranchOpcode(LastOpc) &&
400 "unreachable unconditional branches removed above");
401
402 if (isCondBranchOpcode(LastOpc)) {
403 // Block ends with fall-through condbranch.
404 parseCondBranch(LastInst, TBB, Cond);
405 return false;
406 }
407 return true; // Can't handle indirect branch.
408 }
409 SecondLastInst = &*I;
410 SecondLastOpc = SecondLastInst->getOpcode();
411 }
412
413 // If there are three terminators, we don't know what sort of block this is.
414 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
415 return true;
416
417 // If the block ends with a B and a Bcc, handle it.
418 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
419 parseCondBranch(SecondLastInst, TBB, Cond);
420 FBB = LastInst->getOperand(0).getMBB();
421 return false;
422 }
423
424 // If the block ends with two unconditional branches, handle it. The second
425 // one is not executed, so remove it.
426 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
427 TBB = SecondLastInst->getOperand(0).getMBB();
428 I = LastInst;
429 if (AllowModify)
430 I->eraseFromParent();
431 return false;
432 }
433
434 // ...likewise if it ends with an indirect branch followed by an unconditional
435 // branch.
436 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
437 I = LastInst;
438 if (AllowModify)
439 I->eraseFromParent();
440 return true;
441 }
442
443 // Otherwise, can't handle this.
444 return true;
445}
446
448 MachineBranchPredicate &MBP,
449 bool AllowModify) const {
450 // For the moment, handle only a block which ends with a cb(n)zx followed by
451 // a fallthrough. Why this? Because it is a common form.
452 // TODO: Should we handle b.cc?
453
455 if (I == MBB.end())
456 return true;
457
458 // Skip over SpeculationBarrierEndBB terminators
459 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
460 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
461 --I;
462 }
463
464 if (!isUnpredicatedTerminator(*I))
465 return true;
466
467 // Get the last instruction in the block.
468 MachineInstr *LastInst = &*I;
469 unsigned LastOpc = LastInst->getOpcode();
470 if (!isCondBranchOpcode(LastOpc))
471 return true;
472
473 switch (LastOpc) {
474 default:
475 return true;
476 case AArch64::CBZW:
477 case AArch64::CBZX:
478 case AArch64::CBNZW:
479 case AArch64::CBNZX:
480 break;
481 };
482
483 MBP.TrueDest = LastInst->getOperand(1).getMBB();
484 assert(MBP.TrueDest && "expected!");
485 MBP.FalseDest = MBB.getNextNode();
486
487 MBP.ConditionDef = nullptr;
488 MBP.SingleUseCondition = false;
489
490 MBP.LHS = LastInst->getOperand(0);
491 MBP.RHS = MachineOperand::CreateImm(0);
492 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
493 : MachineBranchPredicate::PRED_EQ;
494 return false;
495}
496
499 if (Cond[0].getImm() != -1) {
500 // Regular Bcc
503 } else {
504 // Folded compare-and-branch
505 switch (Cond[1].getImm()) {
506 default:
507 llvm_unreachable("Unknown conditional branch!");
508 case AArch64::CBZW:
509 Cond[1].setImm(AArch64::CBNZW);
510 break;
511 case AArch64::CBNZW:
512 Cond[1].setImm(AArch64::CBZW);
513 break;
514 case AArch64::CBZX:
515 Cond[1].setImm(AArch64::CBNZX);
516 break;
517 case AArch64::CBNZX:
518 Cond[1].setImm(AArch64::CBZX);
519 break;
520 case AArch64::TBZW:
521 Cond[1].setImm(AArch64::TBNZW);
522 break;
523 case AArch64::TBNZW:
524 Cond[1].setImm(AArch64::TBZW);
525 break;
526 case AArch64::TBZX:
527 Cond[1].setImm(AArch64::TBNZX);
528 break;
529 case AArch64::TBNZX:
530 Cond[1].setImm(AArch64::TBZX);
531 break;
532 }
533 }
534
535 return false;
536}
537
539 int *BytesRemoved) const {
541 if (I == MBB.end())
542 return 0;
543
544 if (!isUncondBranchOpcode(I->getOpcode()) &&
545 !isCondBranchOpcode(I->getOpcode()))
546 return 0;
547
548 // Remove the branch.
549 I->eraseFromParent();
550
551 I = MBB.end();
552
553 if (I == MBB.begin()) {
554 if (BytesRemoved)
555 *BytesRemoved = 4;
556 return 1;
557 }
558 --I;
559 if (!isCondBranchOpcode(I->getOpcode())) {
560 if (BytesRemoved)
561 *BytesRemoved = 4;
562 return 1;
563 }
564
565 // Remove the branch.
566 I->eraseFromParent();
567 if (BytesRemoved)
568 *BytesRemoved = 8;
569
570 return 2;
571}
572
573void AArch64InstrInfo::instantiateCondBranch(
576 if (Cond[0].getImm() != -1) {
577 // Regular Bcc
578 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
579 } else {
580 // Folded compare-and-branch
581 // Note that we use addOperand instead of addReg to keep the flags.
582 const MachineInstrBuilder MIB =
583 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
584 if (Cond.size() > 3)
585 MIB.addImm(Cond[3].getImm());
586 MIB.addMBB(TBB);
587 }
588}
589
592 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
593 // Shouldn't be a fall through.
594 assert(TBB && "insertBranch must not be told to insert a fallthrough");
595
596 if (!FBB) {
597 if (Cond.empty()) // Unconditional branch?
598 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
599 else
600 instantiateCondBranch(MBB, DL, TBB, Cond);
601
602 if (BytesAdded)
603 *BytesAdded = 4;
604
605 return 1;
606 }
607
608 // Two-way conditional branch.
609 instantiateCondBranch(MBB, DL, TBB, Cond);
610 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
611
612 if (BytesAdded)
613 *BytesAdded = 8;
614
615 return 2;
616}
617
618// Find the original register that VReg is copied from.
619static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
620 while (Register::isVirtualRegister(VReg)) {
621 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
622 if (!DefMI->isFullCopy())
623 return VReg;
624 VReg = DefMI->getOperand(1).getReg();
625 }
626 return VReg;
627}
628
629// Determine if VReg is defined by an instruction that can be folded into a
630// csel instruction. If so, return the folded opcode, and the replacement
631// register.
632static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
633 unsigned *NewVReg = nullptr) {
634 VReg = removeCopies(MRI, VReg);
636 return 0;
637
638 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
639 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
640 unsigned Opc = 0;
641 unsigned SrcOpNum = 0;
642 switch (DefMI->getOpcode()) {
643 case AArch64::ADDSXri:
644 case AArch64::ADDSWri:
645 // if NZCV is used, do not fold.
646 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
647 true) == -1)
648 return 0;
649 // fall-through to ADDXri and ADDWri.
650 [[fallthrough]];
651 case AArch64::ADDXri:
652 case AArch64::ADDWri:
653 // add x, 1 -> csinc.
654 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
655 DefMI->getOperand(3).getImm() != 0)
656 return 0;
657 SrcOpNum = 1;
658 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
659 break;
660
661 case AArch64::ORNXrr:
662 case AArch64::ORNWrr: {
663 // not x -> csinv, represented as orn dst, xzr, src.
664 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
665 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
666 return 0;
667 SrcOpNum = 2;
668 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
669 break;
670 }
671
672 case AArch64::SUBSXrr:
673 case AArch64::SUBSWrr:
674 // if NZCV is used, do not fold.
675 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
676 true) == -1)
677 return 0;
678 // fall-through to SUBXrr and SUBWrr.
679 [[fallthrough]];
680 case AArch64::SUBXrr:
681 case AArch64::SUBWrr: {
682 // neg x -> csneg, represented as sub dst, xzr, src.
683 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
684 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
685 return 0;
686 SrcOpNum = 2;
687 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
688 break;
689 }
690 default:
691 return 0;
692 }
693 assert(Opc && SrcOpNum && "Missing parameters");
694
695 if (NewVReg)
696 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
697 return Opc;
698}
699
702 Register DstReg, Register TrueReg,
703 Register FalseReg, int &CondCycles,
704 int &TrueCycles,
705 int &FalseCycles) const {
706 // Check register classes.
708 const TargetRegisterClass *RC =
709 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
710 if (!RC)
711 return false;
712
713 // Also need to check the dest regclass, in case we're trying to optimize
714 // something like:
715 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
716 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
717 return false;
718
719 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
720 unsigned ExtraCondLat = Cond.size() != 1;
721
722 // GPRs are handled by csel.
723 // FIXME: Fold in x+1, -x, and ~x when applicable.
724 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
725 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
726 // Single-cycle csel, csinc, csinv, and csneg.
727 CondCycles = 1 + ExtraCondLat;
728 TrueCycles = FalseCycles = 1;
729 if (canFoldIntoCSel(MRI, TrueReg))
730 TrueCycles = 0;
731 else if (canFoldIntoCSel(MRI, FalseReg))
732 FalseCycles = 0;
733 return true;
734 }
735
736 // Scalar floating point is handled by fcsel.
737 // FIXME: Form fabs, fmin, and fmax when applicable.
738 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
739 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
740 CondCycles = 5 + ExtraCondLat;
741 TrueCycles = FalseCycles = 2;
742 return true;
743 }
744
745 // Can't do vectors.
746 return false;
747}
748
751 const DebugLoc &DL, Register DstReg,
753 Register TrueReg, Register FalseReg) const {
755
756 // Parse the condition code, see parseCondBranch() above.
758 switch (Cond.size()) {
759 default:
760 llvm_unreachable("Unknown condition opcode in Cond");
761 case 1: // b.cc
762 CC = AArch64CC::CondCode(Cond[0].getImm());
763 break;
764 case 3: { // cbz/cbnz
765 // We must insert a compare against 0.
766 bool Is64Bit;
767 switch (Cond[1].getImm()) {
768 default:
769 llvm_unreachable("Unknown branch opcode in Cond");
770 case AArch64::CBZW:
771 Is64Bit = false;
773 break;
774 case AArch64::CBZX:
775 Is64Bit = true;
777 break;
778 case AArch64::CBNZW:
779 Is64Bit = false;
781 break;
782 case AArch64::CBNZX:
783 Is64Bit = true;
785 break;
786 }
787 Register SrcReg = Cond[2].getReg();
788 if (Is64Bit) {
789 // cmp reg, #0 is actually subs xzr, reg, #0.
790 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
791 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
792 .addReg(SrcReg)
793 .addImm(0)
794 .addImm(0);
795 } else {
796 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
797 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
798 .addReg(SrcReg)
799 .addImm(0)
800 .addImm(0);
801 }
802 break;
803 }
804 case 4: { // tbz/tbnz
805 // We must insert a tst instruction.
806 switch (Cond[1].getImm()) {
807 default:
808 llvm_unreachable("Unknown branch opcode in Cond");
809 case AArch64::TBZW:
810 case AArch64::TBZX:
812 break;
813 case AArch64::TBNZW:
814 case AArch64::TBNZX:
816 break;
817 }
818 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
819 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
820 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
821 .addReg(Cond[2].getReg())
822 .addImm(
823 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
824 else
825 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
826 .addReg(Cond[2].getReg())
827 .addImm(
828 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
829 break;
830 }
831 }
832
833 unsigned Opc = 0;
834 const TargetRegisterClass *RC = nullptr;
835 bool TryFold = false;
836 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
837 RC = &AArch64::GPR64RegClass;
838 Opc = AArch64::CSELXr;
839 TryFold = true;
840 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
841 RC = &AArch64::GPR32RegClass;
842 Opc = AArch64::CSELWr;
843 TryFold = true;
844 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
845 RC = &AArch64::FPR64RegClass;
846 Opc = AArch64::FCSELDrrr;
847 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
848 RC = &AArch64::FPR32RegClass;
849 Opc = AArch64::FCSELSrrr;
850 }
851 assert(RC && "Unsupported regclass");
852
853 // Try folding simple instructions into the csel.
854 if (TryFold) {
855 unsigned NewVReg = 0;
856 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
857 if (FoldedOpc) {
858 // The folded opcodes csinc, csinc and csneg apply the operation to
859 // FalseReg, so we need to invert the condition.
861 TrueReg = FalseReg;
862 } else
863 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
864
865 // Fold the operation. Leave any dead instructions for DCE to clean up.
866 if (FoldedOpc) {
867 FalseReg = NewVReg;
868 Opc = FoldedOpc;
869 // The extends the live range of NewVReg.
870 MRI.clearKillFlags(NewVReg);
871 }
872 }
873
874 // Pull all virtual register into the appropriate class.
875 MRI.constrainRegClass(TrueReg, RC);
876 MRI.constrainRegClass(FalseReg, RC);
877
878 // Insert the csel.
879 BuildMI(MBB, I, DL, get(Opc), DstReg)
880 .addReg(TrueReg)
881 .addReg(FalseReg)
882 .addImm(CC);
883}
884
885// Return true if Imm can be loaded into a register by a "cheap" sequence of
886// instructions. For now, "cheap" means at most two instructions.
887static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
888 if (BitSize == 32)
889 return true;
890
891 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
892 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
894 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
895
896 return Is.size() <= 2;
897}
898
899// FIXME: this implementation should be micro-architecture dependent, so a
900// micro-architecture target hook should be introduced here in future.
902 if (Subtarget.hasExynosCheapAsMoveHandling()) {
903 if (isExynosCheapAsMove(MI))
904 return true;
905 return MI.isAsCheapAsAMove();
906 }
907
908 switch (MI.getOpcode()) {
909 default:
910 return MI.isAsCheapAsAMove();
911
912 case AArch64::ADDWrs:
913 case AArch64::ADDXrs:
914 case AArch64::SUBWrs:
915 case AArch64::SUBXrs:
916 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
917
918 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
919 // ORRXri, it is as cheap as MOV.
920 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
921 case AArch64::MOVi32imm:
922 return isCheapImmediate(MI, 32);
923 case AArch64::MOVi64imm:
924 return isCheapImmediate(MI, 64);
925 }
926}
927
929 switch (MI.getOpcode()) {
930 default:
931 return false;
932
933 case AArch64::ADDWrs:
934 case AArch64::ADDXrs:
935 case AArch64::ADDSWrs:
936 case AArch64::ADDSXrs: {
937 unsigned Imm = MI.getOperand(3).getImm();
938 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
939 if (ShiftVal == 0)
940 return true;
941 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
942 }
943
944 case AArch64::ADDWrx:
945 case AArch64::ADDXrx:
946 case AArch64::ADDXrx64:
947 case AArch64::ADDSWrx:
948 case AArch64::ADDSXrx:
949 case AArch64::ADDSXrx64: {
950 unsigned Imm = MI.getOperand(3).getImm();
951 switch (AArch64_AM::getArithExtendType(Imm)) {
952 default:
953 return false;
954 case AArch64_AM::UXTB:
955 case AArch64_AM::UXTH:
956 case AArch64_AM::UXTW:
957 case AArch64_AM::UXTX:
958 return AArch64_AM::getArithShiftValue(Imm) <= 4;
959 }
960 }
961
962 case AArch64::SUBWrs:
963 case AArch64::SUBSWrs: {
964 unsigned Imm = MI.getOperand(3).getImm();
965 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
966 return ShiftVal == 0 ||
967 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
968 }
969
970 case AArch64::SUBXrs:
971 case AArch64::SUBSXrs: {
972 unsigned Imm = MI.getOperand(3).getImm();
973 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
974 return ShiftVal == 0 ||
975 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
976 }
977
978 case AArch64::SUBWrx:
979 case AArch64::SUBXrx:
980 case AArch64::SUBXrx64:
981 case AArch64::SUBSWrx:
982 case AArch64::SUBSXrx:
983 case AArch64::SUBSXrx64: {
984 unsigned Imm = MI.getOperand(3).getImm();
985 switch (AArch64_AM::getArithExtendType(Imm)) {
986 default:
987 return false;
988 case AArch64_AM::UXTB:
989 case AArch64_AM::UXTH:
990 case AArch64_AM::UXTW:
991 case AArch64_AM::UXTX:
992 return AArch64_AM::getArithShiftValue(Imm) == 0;
993 }
994 }
995
996 case AArch64::LDRBBroW:
997 case AArch64::LDRBBroX:
998 case AArch64::LDRBroW:
999 case AArch64::LDRBroX:
1000 case AArch64::LDRDroW:
1001 case AArch64::LDRDroX:
1002 case AArch64::LDRHHroW:
1003 case AArch64::LDRHHroX:
1004 case AArch64::LDRHroW:
1005 case AArch64::LDRHroX:
1006 case AArch64::LDRQroW:
1007 case AArch64::LDRQroX:
1008 case AArch64::LDRSBWroW:
1009 case AArch64::LDRSBWroX:
1010 case AArch64::LDRSBXroW:
1011 case AArch64::LDRSBXroX:
1012 case AArch64::LDRSHWroW:
1013 case AArch64::LDRSHWroX:
1014 case AArch64::LDRSHXroW:
1015 case AArch64::LDRSHXroX:
1016 case AArch64::LDRSWroW:
1017 case AArch64::LDRSWroX:
1018 case AArch64::LDRSroW:
1019 case AArch64::LDRSroX:
1020 case AArch64::LDRWroW:
1021 case AArch64::LDRWroX:
1022 case AArch64::LDRXroW:
1023 case AArch64::LDRXroX:
1024 case AArch64::PRFMroW:
1025 case AArch64::PRFMroX:
1026 case AArch64::STRBBroW:
1027 case AArch64::STRBBroX:
1028 case AArch64::STRBroW:
1029 case AArch64::STRBroX:
1030 case AArch64::STRDroW:
1031 case AArch64::STRDroX:
1032 case AArch64::STRHHroW:
1033 case AArch64::STRHHroX:
1034 case AArch64::STRHroW:
1035 case AArch64::STRHroX:
1036 case AArch64::STRQroW:
1037 case AArch64::STRQroX:
1038 case AArch64::STRSroW:
1039 case AArch64::STRSroX:
1040 case AArch64::STRWroW:
1041 case AArch64::STRWroX:
1042 case AArch64::STRXroW:
1043 case AArch64::STRXroX: {
1044 unsigned IsSigned = MI.getOperand(3).getImm();
1045 return !IsSigned;
1046 }
1047 }
1048}
1049
1051 unsigned Opc = MI.getOpcode();
1052 switch (Opc) {
1053 default:
1054 return false;
1055 case AArch64::SEH_StackAlloc:
1056 case AArch64::SEH_SaveFPLR:
1057 case AArch64::SEH_SaveFPLR_X:
1058 case AArch64::SEH_SaveReg:
1059 case AArch64::SEH_SaveReg_X:
1060 case AArch64::SEH_SaveRegP:
1061 case AArch64::SEH_SaveRegP_X:
1062 case AArch64::SEH_SaveFReg:
1063 case AArch64::SEH_SaveFReg_X:
1064 case AArch64::SEH_SaveFRegP:
1065 case AArch64::SEH_SaveFRegP_X:
1066 case AArch64::SEH_SetFP:
1067 case AArch64::SEH_AddFP:
1068 case AArch64::SEH_Nop:
1069 case AArch64::SEH_PrologEnd:
1070 case AArch64::SEH_EpilogStart:
1071 case AArch64::SEH_EpilogEnd:
1072 case AArch64::SEH_PACSignLR:
1073 case AArch64::SEH_SaveAnyRegQP:
1074 case AArch64::SEH_SaveAnyRegQPX:
1075 return true;
1076 }
1077}
1078
1080 Register &SrcReg, Register &DstReg,
1081 unsigned &SubIdx) const {
1082 switch (MI.getOpcode()) {
1083 default:
1084 return false;
1085 case AArch64::SBFMXri: // aka sxtw
1086 case AArch64::UBFMXri: // aka uxtw
1087 // Check for the 32 -> 64 bit extension case, these instructions can do
1088 // much more.
1089 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1090 return false;
1091 // This is a signed or unsigned 32 -> 64 bit extension.
1092 SrcReg = MI.getOperand(1).getReg();
1093 DstReg = MI.getOperand(0).getReg();
1094 SubIdx = AArch64::sub_32;
1095 return true;
1096 }
1097}
1098
1100 const MachineInstr &MIa, const MachineInstr &MIb) const {
1102 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1103 int64_t OffsetA = 0, OffsetB = 0;
1104 TypeSize WidthA(0, false), WidthB(0, false);
1105 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1106
1107 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1108 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1109
1112 return false;
1113
1114 // Retrieve the base, offset from the base and width. Width
1115 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1116 // base are identical, and the offset of a lower memory access +
1117 // the width doesn't overlap the offset of a higher memory access,
1118 // then the memory accesses are different.
1119 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1120 // are assumed to have the same scale (vscale).
1121 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1122 WidthA, TRI) &&
1123 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1124 WidthB, TRI)) {
1125 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1126 OffsetAIsScalable == OffsetBIsScalable) {
1127 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1128 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1129 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1130 if (LowWidth.isScalable() == OffsetAIsScalable &&
1131 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1132 return true;
1133 }
1134 }
1135 return false;
1136}
1137
1139 const MachineBasicBlock *MBB,
1140 const MachineFunction &MF) const {
1142 return true;
1143
1144 // Do not move an instruction that can be recognized as a branch target.
1145 if (hasBTISemantics(MI))
1146 return true;
1147
1148 switch (MI.getOpcode()) {
1149 case AArch64::HINT:
1150 // CSDB hints are scheduling barriers.
1151 if (MI.getOperand(0).getImm() == 0x14)
1152 return true;
1153 break;
1154 case AArch64::DSB:
1155 case AArch64::ISB:
1156 // DSB and ISB also are scheduling barriers.
1157 return true;
1158 case AArch64::MSRpstatesvcrImm1:
1159 // SMSTART and SMSTOP are also scheduling barriers.
1160 return true;
1161 default:;
1162 }
1163 if (isSEHInstruction(MI))
1164 return true;
1165 auto Next = std::next(MI.getIterator());
1166 return Next != MBB->end() && Next->isCFIInstruction();
1167}
1168
1169/// analyzeCompare - For a comparison instruction, return the source registers
1170/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1171/// Return true if the comparison instruction can be analyzed.
1173 Register &SrcReg2, int64_t &CmpMask,
1174 int64_t &CmpValue) const {
1175 // The first operand can be a frame index where we'd normally expect a
1176 // register.
1177 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1178 if (!MI.getOperand(1).isReg())
1179 return false;
1180
1181 switch (MI.getOpcode()) {
1182 default:
1183 break;
1184 case AArch64::PTEST_PP:
1185 case AArch64::PTEST_PP_ANY:
1186 SrcReg = MI.getOperand(0).getReg();
1187 SrcReg2 = MI.getOperand(1).getReg();
1188 // Not sure about the mask and value for now...
1189 CmpMask = ~0;
1190 CmpValue = 0;
1191 return true;
1192 case AArch64::SUBSWrr:
1193 case AArch64::SUBSWrs:
1194 case AArch64::SUBSWrx:
1195 case AArch64::SUBSXrr:
1196 case AArch64::SUBSXrs:
1197 case AArch64::SUBSXrx:
1198 case AArch64::ADDSWrr:
1199 case AArch64::ADDSWrs:
1200 case AArch64::ADDSWrx:
1201 case AArch64::ADDSXrr:
1202 case AArch64::ADDSXrs:
1203 case AArch64::ADDSXrx:
1204 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1205 SrcReg = MI.getOperand(1).getReg();
1206 SrcReg2 = MI.getOperand(2).getReg();
1207 CmpMask = ~0;
1208 CmpValue = 0;
1209 return true;
1210 case AArch64::SUBSWri:
1211 case AArch64::ADDSWri:
1212 case AArch64::SUBSXri:
1213 case AArch64::ADDSXri:
1214 SrcReg = MI.getOperand(1).getReg();
1215 SrcReg2 = 0;
1216 CmpMask = ~0;
1217 CmpValue = MI.getOperand(2).getImm();
1218 return true;
1219 case AArch64::ANDSWri:
1220 case AArch64::ANDSXri:
1221 // ANDS does not use the same encoding scheme as the others xxxS
1222 // instructions.
1223 SrcReg = MI.getOperand(1).getReg();
1224 SrcReg2 = 0;
1225 CmpMask = ~0;
1227 MI.getOperand(2).getImm(),
1228 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1229 return true;
1230 }
1231
1232 return false;
1233}
1234
1236 MachineBasicBlock *MBB = Instr.getParent();
1237 assert(MBB && "Can't get MachineBasicBlock here");
1238 MachineFunction *MF = MBB->getParent();
1239 assert(MF && "Can't get MachineFunction here");
1243
1244 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1245 ++OpIdx) {
1246 MachineOperand &MO = Instr.getOperand(OpIdx);
1247 const TargetRegisterClass *OpRegCstraints =
1248 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1249
1250 // If there's no constraint, there's nothing to do.
1251 if (!OpRegCstraints)
1252 continue;
1253 // If the operand is a frame index, there's nothing to do here.
1254 // A frame index operand will resolve correctly during PEI.
1255 if (MO.isFI())
1256 continue;
1257
1258 assert(MO.isReg() &&
1259 "Operand has register constraints without being a register!");
1260
1261 Register Reg = MO.getReg();
1262 if (Reg.isPhysical()) {
1263 if (!OpRegCstraints->contains(Reg))
1264 return false;
1265 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1266 !MRI->constrainRegClass(Reg, OpRegCstraints))
1267 return false;
1268 }
1269
1270 return true;
1271}
1272
1273/// Return the opcode that does not set flags when possible - otherwise
1274/// return the original opcode. The caller is responsible to do the actual
1275/// substitution and legality checking.
1277 // Don't convert all compare instructions, because for some the zero register
1278 // encoding becomes the sp register.
1279 bool MIDefinesZeroReg = false;
1280 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1281 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1282 MIDefinesZeroReg = true;
1283
1284 switch (MI.getOpcode()) {
1285 default:
1286 return MI.getOpcode();
1287 case AArch64::ADDSWrr:
1288 return AArch64::ADDWrr;
1289 case AArch64::ADDSWri:
1290 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1291 case AArch64::ADDSWrs:
1292 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1293 case AArch64::ADDSWrx:
1294 return AArch64::ADDWrx;
1295 case AArch64::ADDSXrr:
1296 return AArch64::ADDXrr;
1297 case AArch64::ADDSXri:
1298 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1299 case AArch64::ADDSXrs:
1300 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1301 case AArch64::ADDSXrx:
1302 return AArch64::ADDXrx;
1303 case AArch64::SUBSWrr:
1304 return AArch64::SUBWrr;
1305 case AArch64::SUBSWri:
1306 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1307 case AArch64::SUBSWrs:
1308 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1309 case AArch64::SUBSWrx:
1310 return AArch64::SUBWrx;
1311 case AArch64::SUBSXrr:
1312 return AArch64::SUBXrr;
1313 case AArch64::SUBSXri:
1314 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1315 case AArch64::SUBSXrs:
1316 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1317 case AArch64::SUBSXrx:
1318 return AArch64::SUBXrx;
1319 }
1320}
1321
1322enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1323
1324/// True when condition flags are accessed (either by writing or reading)
1325/// on the instruction trace starting at From and ending at To.
1326///
1327/// Note: If From and To are from different blocks it's assumed CC are accessed
1328/// on the path.
1331 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1332 // Early exit if To is at the beginning of the BB.
1333 if (To == To->getParent()->begin())
1334 return true;
1335
1336 // Check whether the instructions are in the same basic block
1337 // If not, assume the condition flags might get modified somewhere.
1338 if (To->getParent() != From->getParent())
1339 return true;
1340
1341 // From must be above To.
1342 assert(std::any_of(
1343 ++To.getReverse(), To->getParent()->rend(),
1344 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1345
1346 // We iterate backward starting at \p To until we hit \p From.
1347 for (const MachineInstr &Instr :
1348 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1349 if (((AccessToCheck & AK_Write) &&
1350 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1351 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1352 return true;
1353 }
1354 return false;
1355}
1356
1357/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1358/// operation which could set the flags in an identical manner
1359bool AArch64InstrInfo::optimizePTestInstr(
1360 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1361 const MachineRegisterInfo *MRI) const {
1362 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1363 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1364 auto NewOp = Pred->getOpcode();
1365 bool OpChanged = false;
1366
1367 unsigned MaskOpcode = Mask->getOpcode();
1368 unsigned PredOpcode = Pred->getOpcode();
1369 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1370 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1371
1372 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1373 getElementSizeForOpcode(MaskOpcode) ==
1374 getElementSizeForOpcode(PredOpcode) &&
1375 Mask->getOperand(1).getImm() == 31) {
1376 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1377 // redundant since WHILE performs an implicit PTEST with an all active
1378 // mask. Must be an all active predicate of matching element size.
1379
1380 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1381 // PTEST_LIKE instruction uses the same all active mask and the element
1382 // size matches. If the PTEST has a condition of any then it is always
1383 // redundant.
1384 if (PredIsPTestLike) {
1385 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1386 if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1387 return false;
1388 }
1389
1390 // Fallthough to simply remove the PTEST.
1391 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1392 PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1393 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1394 // instruction that sets the flags as PTEST would. This is only valid when
1395 // the condition is any.
1396
1397 // Fallthough to simply remove the PTEST.
1398 } else if (PredIsPTestLike) {
1399 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1400 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1401 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1402 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1403 // performed by the compare could consider fewer lanes for these element
1404 // sizes.
1405 //
1406 // For example, consider
1407 //
1408 // ptrue p0.b ; P0=1111-1111-1111-1111
1409 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1410 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1411 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1412 // ; ^ last active
1413 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1414 // ; ^ last active
1415 //
1416 // where the compare generates a canonical all active 32-bit predicate
1417 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1418 // active flag, whereas the PTEST instruction with the same mask doesn't.
1419 // For PTEST_ANY this doesn't apply as the flags in this case would be
1420 // identical regardless of element size.
1421 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1422 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1423 if ((Mask != PTestLikeMask) ||
1424 (PredElementSize != AArch64::ElementSizeB &&
1425 PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1426 return false;
1427
1428 // Fallthough to simply remove the PTEST.
1429 } else {
1430 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1431 // opcode so the PTEST becomes redundant.
1432 switch (PredOpcode) {
1433 case AArch64::AND_PPzPP:
1434 case AArch64::BIC_PPzPP:
1435 case AArch64::EOR_PPzPP:
1436 case AArch64::NAND_PPzPP:
1437 case AArch64::NOR_PPzPP:
1438 case AArch64::ORN_PPzPP:
1439 case AArch64::ORR_PPzPP:
1440 case AArch64::BRKA_PPzP:
1441 case AArch64::BRKPA_PPzPP:
1442 case AArch64::BRKB_PPzP:
1443 case AArch64::BRKPB_PPzPP:
1444 case AArch64::RDFFR_PPz: {
1445 // Check to see if our mask is the same. If not the resulting flag bits
1446 // may be different and we can't remove the ptest.
1447 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1448 if (Mask != PredMask)
1449 return false;
1450 break;
1451 }
1452 case AArch64::BRKN_PPzP: {
1453 // BRKN uses an all active implicit mask to set flags unlike the other
1454 // flag-setting instructions.
1455 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1456 if ((MaskOpcode != AArch64::PTRUE_B) ||
1457 (Mask->getOperand(1).getImm() != 31))
1458 return false;
1459 break;
1460 }
1461 case AArch64::PTRUE_B:
1462 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1463 break;
1464 default:
1465 // Bail out if we don't recognize the input
1466 return false;
1467 }
1468
1469 NewOp = convertToFlagSettingOpc(PredOpcode);
1470 OpChanged = true;
1471 }
1472
1474
1475 // If another instruction between Pred and PTest accesses flags, don't remove
1476 // the ptest or update the earlier instruction to modify them.
1477 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1478 return false;
1479
1480 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1481 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1482 // operand to be replaced with an equivalent instruction that also sets the
1483 // flags.
1484 Pred->setDesc(get(NewOp));
1485 PTest->eraseFromParent();
1486 if (OpChanged) {
1487 bool succeeded = UpdateOperandRegClass(*Pred);
1488 (void)succeeded;
1489 assert(succeeded && "Operands have incompatible register classes!");
1490 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1491 }
1492
1493 // Ensure that the flags def is live.
1494 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1495 unsigned i = 0, e = Pred->getNumOperands();
1496 for (; i != e; ++i) {
1497 MachineOperand &MO = Pred->getOperand(i);
1498 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1499 MO.setIsDead(false);
1500 break;
1501 }
1502 }
1503 }
1504 return true;
1505}
1506
1507/// Try to optimize a compare instruction. A compare instruction is an
1508/// instruction which produces AArch64::NZCV. It can be truly compare
1509/// instruction
1510/// when there are no uses of its destination register.
1511///
1512/// The following steps are tried in order:
1513/// 1. Convert CmpInstr into an unconditional version.
1514/// 2. Remove CmpInstr if above there is an instruction producing a needed
1515/// condition code or an instruction which can be converted into such an
1516/// instruction.
1517/// Only comparison with zero is supported.
1519 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1520 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1521 assert(CmpInstr.getParent());
1522 assert(MRI);
1523
1524 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1525 int DeadNZCVIdx =
1526 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1527 if (DeadNZCVIdx != -1) {
1528 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1529 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1530 CmpInstr.eraseFromParent();
1531 return true;
1532 }
1533 unsigned Opc = CmpInstr.getOpcode();
1534 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1535 if (NewOpc == Opc)
1536 return false;
1537 const MCInstrDesc &MCID = get(NewOpc);
1538 CmpInstr.setDesc(MCID);
1539 CmpInstr.removeOperand(DeadNZCVIdx);
1540 bool succeeded = UpdateOperandRegClass(CmpInstr);
1541 (void)succeeded;
1542 assert(succeeded && "Some operands reg class are incompatible!");
1543 return true;
1544 }
1545
1546 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1547 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1548 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1549
1550 if (SrcReg2 != 0)
1551 return false;
1552
1553 // CmpInstr is a Compare instruction if destination register is not used.
1554 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1555 return false;
1556
1557 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1558 return true;
1559 return (CmpValue == 0 || CmpValue == 1) &&
1560 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1561}
1562
1563/// Get opcode of S version of Instr.
1564/// If Instr is S version its opcode is returned.
1565/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1566/// or we are not interested in it.
1567static unsigned sForm(MachineInstr &Instr) {
1568 switch (Instr.getOpcode()) {
1569 default:
1570 return AArch64::INSTRUCTION_LIST_END;
1571
1572 case AArch64::ADDSWrr:
1573 case AArch64::ADDSWri:
1574 case AArch64::ADDSXrr:
1575 case AArch64::ADDSXri:
1576 case AArch64::SUBSWrr:
1577 case AArch64::SUBSWri:
1578 case AArch64::SUBSXrr:
1579 case AArch64::SUBSXri:
1580 return Instr.getOpcode();
1581
1582 case AArch64::ADDWrr:
1583 return AArch64::ADDSWrr;
1584 case AArch64::ADDWri:
1585 return AArch64::ADDSWri;
1586 case AArch64::ADDXrr:
1587 return AArch64::ADDSXrr;
1588 case AArch64::ADDXri:
1589 return AArch64::ADDSXri;
1590 case AArch64::ADCWr:
1591 return AArch64::ADCSWr;
1592 case AArch64::ADCXr:
1593 return AArch64::ADCSXr;
1594 case AArch64::SUBWrr:
1595 return AArch64::SUBSWrr;
1596 case AArch64::SUBWri:
1597 return AArch64::SUBSWri;
1598 case AArch64::SUBXrr:
1599 return AArch64::SUBSXrr;
1600 case AArch64::SUBXri:
1601 return AArch64::SUBSXri;
1602 case AArch64::SBCWr:
1603 return AArch64::SBCSWr;
1604 case AArch64::SBCXr:
1605 return AArch64::SBCSXr;
1606 case AArch64::ANDWri:
1607 return AArch64::ANDSWri;
1608 case AArch64::ANDXri:
1609 return AArch64::ANDSXri;
1610 }
1611}
1612
1613/// Check if AArch64::NZCV should be alive in successors of MBB.
1615 for (auto *BB : MBB->successors())
1616 if (BB->isLiveIn(AArch64::NZCV))
1617 return true;
1618 return false;
1619}
1620
1621/// \returns The condition code operand index for \p Instr if it is a branch
1622/// or select and -1 otherwise.
1623static int
1625 switch (Instr.getOpcode()) {
1626 default:
1627 return -1;
1628
1629 case AArch64::Bcc: {
1630 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1631 assert(Idx >= 2);
1632 return Idx - 2;
1633 }
1634
1635 case AArch64::CSINVWr:
1636 case AArch64::CSINVXr:
1637 case AArch64::CSINCWr:
1638 case AArch64::CSINCXr:
1639 case AArch64::CSELWr:
1640 case AArch64::CSELXr:
1641 case AArch64::CSNEGWr:
1642 case AArch64::CSNEGXr:
1643 case AArch64::FCSELSrrr:
1644 case AArch64::FCSELDrrr: {
1645 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1646 assert(Idx >= 1);
1647 return Idx - 1;
1648 }
1649 }
1650}
1651
1652/// Find a condition code used by the instruction.
1653/// Returns AArch64CC::Invalid if either the instruction does not use condition
1654/// codes or we don't optimize CmpInstr in the presence of such instructions.
1657 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1658 Instr.getOperand(CCIdx).getImm())
1660}
1661
1664 UsedNZCV UsedFlags;
1665 switch (CC) {
1666 default:
1667 break;
1668
1669 case AArch64CC::EQ: // Z set
1670 case AArch64CC::NE: // Z clear
1671 UsedFlags.Z = true;
1672 break;
1673
1674 case AArch64CC::HI: // Z clear and C set
1675 case AArch64CC::LS: // Z set or C clear
1676 UsedFlags.Z = true;
1677 [[fallthrough]];
1678 case AArch64CC::HS: // C set
1679 case AArch64CC::LO: // C clear
1680 UsedFlags.C = true;
1681 break;
1682
1683 case AArch64CC::MI: // N set
1684 case AArch64CC::PL: // N clear
1685 UsedFlags.N = true;
1686 break;
1687
1688 case AArch64CC::VS: // V set
1689 case AArch64CC::VC: // V clear
1690 UsedFlags.V = true;
1691 break;
1692
1693 case AArch64CC::GT: // Z clear, N and V the same
1694 case AArch64CC::LE: // Z set, N and V differ
1695 UsedFlags.Z = true;
1696 [[fallthrough]];
1697 case AArch64CC::GE: // N and V the same
1698 case AArch64CC::LT: // N and V differ
1699 UsedFlags.N = true;
1700 UsedFlags.V = true;
1701 break;
1702 }
1703 return UsedFlags;
1704}
1705
1706/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1707/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1708/// \returns std::nullopt otherwise.
1709///
1710/// Collect instructions using that flags in \p CCUseInstrs if provided.
1711std::optional<UsedNZCV>
1713 const TargetRegisterInfo &TRI,
1714 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1715 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1716 if (MI.getParent() != CmpParent)
1717 return std::nullopt;
1718
1719 if (areCFlagsAliveInSuccessors(CmpParent))
1720 return std::nullopt;
1721
1722 UsedNZCV NZCVUsedAfterCmp;
1724 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1725 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1727 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1728 return std::nullopt;
1729 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1730 if (CCUseInstrs)
1731 CCUseInstrs->push_back(&Instr);
1732 }
1733 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1734 break;
1735 }
1736 return NZCVUsedAfterCmp;
1737}
1738
1739static bool isADDSRegImm(unsigned Opcode) {
1740 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1741}
1742
1743static bool isSUBSRegImm(unsigned Opcode) {
1744 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1745}
1746
1747/// Check if CmpInstr can be substituted by MI.
1748///
1749/// CmpInstr can be substituted:
1750/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1751/// - and, MI and CmpInstr are from the same MachineBB
1752/// - and, condition flags are not alive in successors of the CmpInstr parent
1753/// - and, if MI opcode is the S form there must be no defs of flags between
1754/// MI and CmpInstr
1755/// or if MI opcode is not the S form there must be neither defs of flags
1756/// nor uses of flags between MI and CmpInstr.
1757/// - and, if C/V flags are not used after CmpInstr
1758/// or if N flag is used but MI produces poison value if signed overflow
1759/// occurs.
1761 const TargetRegisterInfo &TRI) {
1762 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1763 // that may or may not set flags.
1764 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1765
1766 const unsigned CmpOpcode = CmpInstr.getOpcode();
1767 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1768 return false;
1769
1770 assert((CmpInstr.getOperand(2).isImm() &&
1771 CmpInstr.getOperand(2).getImm() == 0) &&
1772 "Caller guarantees that CmpInstr compares with constant 0");
1773
1774 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1775 if (!NZVCUsed || NZVCUsed->C)
1776 return false;
1777
1778 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1779 // '%vreg = add ...' or '%vreg = sub ...'.
1780 // Condition flag V is used to indicate signed overflow.
1781 // 1) MI and CmpInstr set N and V to the same value.
1782 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1783 // signed overflow occurs, so CmpInstr could still be simplified away.
1784 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1785 return false;
1786
1787 AccessKind AccessToCheck = AK_Write;
1788 if (sForm(MI) != MI.getOpcode())
1789 AccessToCheck = AK_All;
1790 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1791}
1792
1793/// Substitute an instruction comparing to zero with another instruction
1794/// which produces needed condition flags.
1795///
1796/// Return true on success.
1797bool AArch64InstrInfo::substituteCmpToZero(
1798 MachineInstr &CmpInstr, unsigned SrcReg,
1799 const MachineRegisterInfo &MRI) const {
1800 // Get the unique definition of SrcReg.
1801 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1802 if (!MI)
1803 return false;
1804
1806
1807 unsigned NewOpc = sForm(*MI);
1808 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1809 return false;
1810
1811 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1812 return false;
1813
1814 // Update the instruction to set NZCV.
1815 MI->setDesc(get(NewOpc));
1816 CmpInstr.eraseFromParent();
1817 bool succeeded = UpdateOperandRegClass(*MI);
1818 (void)succeeded;
1819 assert(succeeded && "Some operands reg class are incompatible!");
1820 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1821 return true;
1822}
1823
1824/// \returns True if \p CmpInstr can be removed.
1825///
1826/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1827/// codes used in \p CCUseInstrs must be inverted.
1829 int CmpValue, const TargetRegisterInfo &TRI,
1831 bool &IsInvertCC) {
1832 assert((CmpValue == 0 || CmpValue == 1) &&
1833 "Only comparisons to 0 or 1 considered for removal!");
1834
1835 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1836 unsigned MIOpc = MI.getOpcode();
1837 if (MIOpc == AArch64::CSINCWr) {
1838 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1839 MI.getOperand(2).getReg() != AArch64::WZR)
1840 return false;
1841 } else if (MIOpc == AArch64::CSINCXr) {
1842 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1843 MI.getOperand(2).getReg() != AArch64::XZR)
1844 return false;
1845 } else {
1846 return false;
1847 }
1849 if (MICC == AArch64CC::Invalid)
1850 return false;
1851
1852 // NZCV needs to be defined
1853 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1854 return false;
1855
1856 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1857 const unsigned CmpOpcode = CmpInstr.getOpcode();
1858 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1859 if (CmpValue && !IsSubsRegImm)
1860 return false;
1861 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1862 return false;
1863
1864 // MI conditions allowed: eq, ne, mi, pl
1865 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1866 if (MIUsedNZCV.C || MIUsedNZCV.V)
1867 return false;
1868
1869 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1870 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1871 // Condition flags are not used in CmpInstr basic block successors and only
1872 // Z or N flags allowed to be used after CmpInstr within its basic block
1873 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1874 return false;
1875 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1876 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1877 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1878 return false;
1879 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1880 if (MIUsedNZCV.N && !CmpValue)
1881 return false;
1882
1883 // There must be no defs of flags between MI and CmpInstr
1884 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1885 return false;
1886
1887 // Condition code is inverted in the following cases:
1888 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1889 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1890 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1891 (!CmpValue && MICC == AArch64CC::NE);
1892 return true;
1893}
1894
1895/// Remove comparison in csinc-cmp sequence
1896///
1897/// Examples:
1898/// 1. \code
1899/// csinc w9, wzr, wzr, ne
1900/// cmp w9, #0
1901/// b.eq
1902/// \endcode
1903/// to
1904/// \code
1905/// csinc w9, wzr, wzr, ne
1906/// b.ne
1907/// \endcode
1908///
1909/// 2. \code
1910/// csinc x2, xzr, xzr, mi
1911/// cmp x2, #1
1912/// b.pl
1913/// \endcode
1914/// to
1915/// \code
1916/// csinc x2, xzr, xzr, mi
1917/// b.pl
1918/// \endcode
1919///
1920/// \param CmpInstr comparison instruction
1921/// \return True when comparison removed
1922bool AArch64InstrInfo::removeCmpToZeroOrOne(
1923 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1924 const MachineRegisterInfo &MRI) const {
1925 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1926 if (!MI)
1927 return false;
1930 bool IsInvertCC = false;
1931 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1932 IsInvertCC))
1933 return false;
1934 // Make transformation
1935 CmpInstr.eraseFromParent();
1936 if (IsInvertCC) {
1937 // Invert condition codes in CmpInstr CC users
1938 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1940 assert(Idx >= 0 && "Unexpected instruction using CC.");
1941 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1943 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1944 CCOperand.setImm(CCUse);
1945 }
1946 }
1947 return true;
1948}
1949
1951 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1952 MI.getOpcode() != AArch64::CATCHRET)
1953 return false;
1954
1955 MachineBasicBlock &MBB = *MI.getParent();
1956 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1957 auto TRI = Subtarget.getRegisterInfo();
1958 DebugLoc DL = MI.getDebugLoc();
1959
1960 if (MI.getOpcode() == AArch64::CATCHRET) {
1961 // Skip to the first instruction before the epilog.
1962 const TargetInstrInfo *TII =
1964 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1966 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1967 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1968 FirstEpilogSEH != MBB.begin())
1969 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1970 if (FirstEpilogSEH != MBB.begin())
1971 FirstEpilogSEH = std::next(FirstEpilogSEH);
1972 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1973 .addReg(AArch64::X0, RegState::Define)
1974 .addMBB(TargetMBB);
1975 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1976 .addReg(AArch64::X0, RegState::Define)
1977 .addReg(AArch64::X0)
1978 .addMBB(TargetMBB)
1979 .addImm(0);
1980 return true;
1981 }
1982
1983 Register Reg = MI.getOperand(0).getReg();
1985 if (M.getStackProtectorGuard() == "sysreg") {
1986 const AArch64SysReg::SysReg *SrcReg =
1987 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1988 if (!SrcReg)
1989 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1990
1991 // mrs xN, sysreg
1992 BuildMI(MBB, MI, DL, get(AArch64::MRS))
1994 .addImm(SrcReg->Encoding);
1995 int Offset = M.getStackProtectorGuardOffset();
1996 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1997 // ldr xN, [xN, #offset]
1998 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1999 .addDef(Reg)
2000 .addUse(Reg, RegState::Kill)
2001 .addImm(Offset / 8);
2002 } else if (Offset >= -256 && Offset <= 255) {
2003 // ldur xN, [xN, #offset]
2004 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2005 .addDef(Reg)
2006 .addUse(Reg, RegState::Kill)
2007 .addImm(Offset);
2008 } else if (Offset >= -4095 && Offset <= 4095) {
2009 if (Offset > 0) {
2010 // add xN, xN, #offset
2011 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2012 .addDef(Reg)
2013 .addUse(Reg, RegState::Kill)
2014 .addImm(Offset)
2015 .addImm(0);
2016 } else {
2017 // sub xN, xN, #offset
2018 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2019 .addDef(Reg)
2020 .addUse(Reg, RegState::Kill)
2021 .addImm(-Offset)
2022 .addImm(0);
2023 }
2024 // ldr xN, [xN]
2025 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2026 .addDef(Reg)
2027 .addUse(Reg, RegState::Kill)
2028 .addImm(0);
2029 } else {
2030 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2031 // than 23760.
2032 // It might be nice to use AArch64::MOVi32imm here, which would get
2033 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2034 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2035 // AArch64FrameLowering might help us find such a scratch register
2036 // though. If we failed to find a scratch register, we could emit a
2037 // stream of add instructions to build up the immediate. Or, we could try
2038 // to insert a AArch64::MOVi32imm before register allocation so that we
2039 // didn't need to scavenge for a scratch register.
2040 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2041 }
2042 MBB.erase(MI);
2043 return true;
2044 }
2045
2046 const GlobalValue *GV =
2047 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2048 const TargetMachine &TM = MBB.getParent()->getTarget();
2049 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2050 const unsigned char MO_NC = AArch64II::MO_NC;
2051
2052 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2053 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2054 .addGlobalAddress(GV, 0, OpFlags);
2055 if (Subtarget.isTargetILP32()) {
2056 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2057 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2058 .addDef(Reg32, RegState::Dead)
2059 .addUse(Reg, RegState::Kill)
2060 .addImm(0)
2061 .addMemOperand(*MI.memoperands_begin())
2063 } else {
2064 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2065 .addReg(Reg, RegState::Kill)
2066 .addImm(0)
2067 .addMemOperand(*MI.memoperands_begin());
2068 }
2069 } else if (TM.getCodeModel() == CodeModel::Large) {
2070 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2071 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2072 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2073 .addImm(0);
2074 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2075 .addReg(Reg, RegState::Kill)
2076 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2077 .addImm(16);
2078 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2079 .addReg(Reg, RegState::Kill)
2080 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2081 .addImm(32);
2082 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2083 .addReg(Reg, RegState::Kill)
2085 .addImm(48);
2086 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2087 .addReg(Reg, RegState::Kill)
2088 .addImm(0)
2089 .addMemOperand(*MI.memoperands_begin());
2090 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2091 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2092 .addGlobalAddress(GV, 0, OpFlags);
2093 } else {
2094 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2095 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2096 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2097 if (Subtarget.isTargetILP32()) {
2098 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2099 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2100 .addDef(Reg32, RegState::Dead)
2101 .addUse(Reg, RegState::Kill)
2102 .addGlobalAddress(GV, 0, LoFlags)
2103 .addMemOperand(*MI.memoperands_begin())
2105 } else {
2106 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2107 .addReg(Reg, RegState::Kill)
2108 .addGlobalAddress(GV, 0, LoFlags)
2109 .addMemOperand(*MI.memoperands_begin());
2110 }
2111 }
2112
2113 MBB.erase(MI);
2114
2115 return true;
2116}
2117
2118// Return true if this instruction simply sets its single destination register
2119// to zero. This is equivalent to a register rename of the zero-register.
2121 switch (MI.getOpcode()) {
2122 default:
2123 break;
2124 case AArch64::MOVZWi:
2125 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2126 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2127 assert(MI.getDesc().getNumOperands() == 3 &&
2128 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2129 return true;
2130 }
2131 break;
2132 case AArch64::ANDWri: // and Rd, Rzr, #imm
2133 return MI.getOperand(1).getReg() == AArch64::WZR;
2134 case AArch64::ANDXri:
2135 return MI.getOperand(1).getReg() == AArch64::XZR;
2136 case TargetOpcode::COPY:
2137 return MI.getOperand(1).getReg() == AArch64::WZR;
2138 }
2139 return false;
2140}
2141
2142// Return true if this instruction simply renames a general register without
2143// modifying bits.
2145 switch (MI.getOpcode()) {
2146 default:
2147 break;
2148 case TargetOpcode::COPY: {
2149 // GPR32 copies will by lowered to ORRXrs
2150 Register DstReg = MI.getOperand(0).getReg();
2151 return (AArch64::GPR32RegClass.contains(DstReg) ||
2152 AArch64::GPR64RegClass.contains(DstReg));
2153 }
2154 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2155 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2156 assert(MI.getDesc().getNumOperands() == 4 &&
2157 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2158 return true;
2159 }
2160 break;
2161 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2162 if (MI.getOperand(2).getImm() == 0) {
2163 assert(MI.getDesc().getNumOperands() == 4 &&
2164 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2165 return true;
2166 }
2167 break;
2168 }
2169 return false;
2170}
2171
2172// Return true if this instruction simply renames a general register without
2173// modifying bits.
2175 switch (MI.getOpcode()) {
2176 default:
2177 break;
2178 case TargetOpcode::COPY: {
2179 Register DstReg = MI.getOperand(0).getReg();
2180 return AArch64::FPR128RegClass.contains(DstReg);
2181 }
2182 case AArch64::ORRv16i8:
2183 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2184 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2185 "invalid ORRv16i8 operands");
2186 return true;
2187 }
2188 break;
2189 }
2190 return false;
2191}
2192
2194 int &FrameIndex) const {
2195 switch (MI.getOpcode()) {
2196 default:
2197 break;
2198 case AArch64::LDRWui:
2199 case AArch64::LDRXui:
2200 case AArch64::LDRBui:
2201 case AArch64::LDRHui:
2202 case AArch64::LDRSui:
2203 case AArch64::LDRDui:
2204 case AArch64::LDRQui:
2205 case AArch64::LDR_PXI:
2206 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2207 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2208 FrameIndex = MI.getOperand(1).getIndex();
2209 return MI.getOperand(0).getReg();
2210 }
2211 break;
2212 }
2213
2214 return 0;
2215}
2216
2218 int &FrameIndex) const {
2219 switch (MI.getOpcode()) {
2220 default:
2221 break;
2222 case AArch64::STRWui:
2223 case AArch64::STRXui:
2224 case AArch64::STRBui:
2225 case AArch64::STRHui:
2226 case AArch64::STRSui:
2227 case AArch64::STRDui:
2228 case AArch64::STRQui:
2229 case AArch64::STR_PXI:
2230 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2231 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2232 FrameIndex = MI.getOperand(1).getIndex();
2233 return MI.getOperand(0).getReg();
2234 }
2235 break;
2236 }
2237 return 0;
2238}
2239
2240/// Check all MachineMemOperands for a hint to suppress pairing.
2242 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2243 return MMO->getFlags() & MOSuppressPair;
2244 });
2245}
2246
2247/// Set a flag on the first MachineMemOperand to suppress pairing.
2249 if (MI.memoperands_empty())
2250 return;
2251 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2252}
2253
2254/// Check all MachineMemOperands for a hint that the load/store is strided.
2256 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2257 return MMO->getFlags() & MOStridedAccess;
2258 });
2259}
2260
2262 switch (Opc) {
2263 default:
2264 return false;
2265 case AArch64::STURSi:
2266 case AArch64::STRSpre:
2267 case AArch64::STURDi:
2268 case AArch64::STRDpre:
2269 case AArch64::STURQi:
2270 case AArch64::STRQpre:
2271 case AArch64::STURBBi:
2272 case AArch64::STURHHi:
2273 case AArch64::STURWi:
2274 case AArch64::STRWpre:
2275 case AArch64::STURXi:
2276 case AArch64::STRXpre:
2277 case AArch64::LDURSi:
2278 case AArch64::LDRSpre:
2279 case AArch64::LDURDi:
2280 case AArch64::LDRDpre:
2281 case AArch64::LDURQi:
2282 case AArch64::LDRQpre:
2283 case AArch64::LDURWi:
2284 case AArch64::LDRWpre:
2285 case AArch64::LDURXi:
2286 case AArch64::LDRXpre:
2287 case AArch64::LDRSWpre:
2288 case AArch64::LDURSWi:
2289 case AArch64::LDURHHi:
2290 case AArch64::LDURBBi:
2291 case AArch64::LDURSBWi:
2292 case AArch64::LDURSHWi:
2293 return true;
2294 }
2295}
2296
2297std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2298 switch (Opc) {
2299 default: return {};
2300 case AArch64::PRFMui: return AArch64::PRFUMi;
2301 case AArch64::LDRXui: return AArch64::LDURXi;
2302 case AArch64::LDRWui: return AArch64::LDURWi;
2303 case AArch64::LDRBui: return AArch64::LDURBi;
2304 case AArch64::LDRHui: return AArch64::LDURHi;
2305 case AArch64::LDRSui: return AArch64::LDURSi;
2306 case AArch64::LDRDui: return AArch64::LDURDi;
2307 case AArch64::LDRQui: return AArch64::LDURQi;
2308 case AArch64::LDRBBui: return AArch64::LDURBBi;
2309 case AArch64::LDRHHui: return AArch64::LDURHHi;
2310 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2311 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2312 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2313 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2314 case AArch64::LDRSWui: return AArch64::LDURSWi;
2315 case AArch64::STRXui: return AArch64::STURXi;
2316 case AArch64::STRWui: return AArch64::STURWi;
2317 case AArch64::STRBui: return AArch64::STURBi;
2318 case AArch64::STRHui: return AArch64::STURHi;
2319 case AArch64::STRSui: return AArch64::STURSi;
2320 case AArch64::STRDui: return AArch64::STURDi;
2321 case AArch64::STRQui: return AArch64::STURQi;
2322 case AArch64::STRBBui: return AArch64::STURBBi;
2323 case AArch64::STRHHui: return AArch64::STURHHi;
2324 }
2325}
2326
2328 switch (Opc) {
2329 default:
2330 return 2;
2331 case AArch64::LDPXi:
2332 case AArch64::LDPDi:
2333 case AArch64::STPXi:
2334 case AArch64::STPDi:
2335 case AArch64::LDNPXi:
2336 case AArch64::LDNPDi:
2337 case AArch64::STNPXi:
2338 case AArch64::STNPDi:
2339 case AArch64::LDPQi:
2340 case AArch64::STPQi:
2341 case AArch64::LDNPQi:
2342 case AArch64::STNPQi:
2343 case AArch64::LDPWi:
2344 case AArch64::LDPSi:
2345 case AArch64::STPWi:
2346 case AArch64::STPSi:
2347 case AArch64::LDNPWi:
2348 case AArch64::LDNPSi:
2349 case AArch64::STNPWi:
2350 case AArch64::STNPSi:
2351 case AArch64::LDG:
2352 case AArch64::STGPi:
2353
2354 case AArch64::LD1B_IMM:
2355 case AArch64::LD1B_H_IMM:
2356 case AArch64::LD1B_S_IMM:
2357 case AArch64::LD1B_D_IMM:
2358 case AArch64::LD1SB_H_IMM:
2359 case AArch64::LD1SB_S_IMM:
2360 case AArch64::LD1SB_D_IMM:
2361 case AArch64::LD1H_IMM:
2362 case AArch64::LD1H_S_IMM:
2363 case AArch64::LD1H_D_IMM:
2364 case AArch64::LD1SH_S_IMM:
2365 case AArch64::LD1SH_D_IMM:
2366 case AArch64::LD1W_IMM:
2367 case AArch64::LD1W_D_IMM:
2368 case AArch64::LD1SW_D_IMM:
2369 case AArch64::LD1D_IMM:
2370
2371 case AArch64::LD2B_IMM:
2372 case AArch64::LD2H_IMM:
2373 case AArch64::LD2W_IMM:
2374 case AArch64::LD2D_IMM:
2375 case AArch64::LD3B_IMM:
2376 case AArch64::LD3H_IMM:
2377 case AArch64::LD3W_IMM:
2378 case AArch64::LD3D_IMM:
2379 case AArch64::LD4B_IMM:
2380 case AArch64::LD4H_IMM:
2381 case AArch64::LD4W_IMM:
2382 case AArch64::LD4D_IMM:
2383
2384 case AArch64::ST1B_IMM:
2385 case AArch64::ST1B_H_IMM:
2386 case AArch64::ST1B_S_IMM:
2387 case AArch64::ST1B_D_IMM:
2388 case AArch64::ST1H_IMM:
2389 case AArch64::ST1H_S_IMM:
2390 case AArch64::ST1H_D_IMM:
2391 case AArch64::ST1W_IMM:
2392 case AArch64::ST1W_D_IMM:
2393 case AArch64::ST1D_IMM:
2394
2395 case AArch64::ST2B_IMM:
2396 case AArch64::ST2H_IMM:
2397 case AArch64::ST2W_IMM:
2398 case AArch64::ST2D_IMM:
2399 case AArch64::ST3B_IMM:
2400 case AArch64::ST3H_IMM:
2401 case AArch64::ST3W_IMM:
2402 case AArch64::ST3D_IMM:
2403 case AArch64::ST4B_IMM:
2404 case AArch64::ST4H_IMM:
2405 case AArch64::ST4W_IMM:
2406 case AArch64::ST4D_IMM:
2407
2408 case AArch64::LD1RB_IMM:
2409 case AArch64::LD1RB_H_IMM:
2410 case AArch64::LD1RB_S_IMM:
2411 case AArch64::LD1RB_D_IMM:
2412 case AArch64::LD1RSB_H_IMM:
2413 case AArch64::LD1RSB_S_IMM:
2414 case AArch64::LD1RSB_D_IMM:
2415 case AArch64::LD1RH_IMM:
2416 case AArch64::LD1RH_S_IMM:
2417 case AArch64::LD1RH_D_IMM:
2418 case AArch64::LD1RSH_S_IMM:
2419 case AArch64::LD1RSH_D_IMM:
2420 case AArch64::LD1RW_IMM:
2421 case AArch64::LD1RW_D_IMM:
2422 case AArch64::LD1RSW_IMM:
2423 case AArch64::LD1RD_IMM:
2424
2425 case AArch64::LDNT1B_ZRI:
2426 case AArch64::LDNT1H_ZRI:
2427 case AArch64::LDNT1W_ZRI:
2428 case AArch64::LDNT1D_ZRI:
2429 case AArch64::STNT1B_ZRI:
2430 case AArch64::STNT1H_ZRI:
2431 case AArch64::STNT1W_ZRI:
2432 case AArch64::STNT1D_ZRI:
2433
2434 case AArch64::LDNF1B_IMM:
2435 case AArch64::LDNF1B_H_IMM:
2436 case AArch64::LDNF1B_S_IMM:
2437 case AArch64::LDNF1B_D_IMM:
2438 case AArch64::LDNF1SB_H_IMM:
2439 case AArch64::LDNF1SB_S_IMM:
2440 case AArch64::LDNF1SB_D_IMM:
2441 case AArch64::LDNF1H_IMM:
2442 case AArch64::LDNF1H_S_IMM:
2443 case AArch64::LDNF1H_D_IMM:
2444 case AArch64::LDNF1SH_S_IMM:
2445 case AArch64::LDNF1SH_D_IMM:
2446 case AArch64::LDNF1W_IMM:
2447 case AArch64::LDNF1W_D_IMM:
2448 case AArch64::LDNF1SW_D_IMM:
2449 case AArch64::LDNF1D_IMM:
2450 return 3;
2451 case AArch64::ADDG:
2452 case AArch64::STGi:
2453 case AArch64::LDR_PXI:
2454 case AArch64::STR_PXI:
2455 return 2;
2456 }
2457}
2458
2460 switch (MI.getOpcode()) {
2461 default:
2462 return false;
2463 // Scaled instructions.
2464 case AArch64::STRSui:
2465 case AArch64::STRDui:
2466 case AArch64::STRQui:
2467 case AArch64::STRXui:
2468 case AArch64::STRWui:
2469 case AArch64::LDRSui:
2470 case AArch64::LDRDui:
2471 case AArch64::LDRQui:
2472 case AArch64::LDRXui:
2473 case AArch64::LDRWui:
2474 case AArch64::LDRSWui:
2475 // Unscaled instructions.
2476 case AArch64::STURSi:
2477 case AArch64::STRSpre:
2478 case AArch64::STURDi:
2479 case AArch64::STRDpre:
2480 case AArch64::STURQi:
2481 case AArch64::STRQpre:
2482 case AArch64::STURWi:
2483 case AArch64::STRWpre:
2484 case AArch64::STURXi:
2485 case AArch64::STRXpre:
2486 case AArch64::LDURSi:
2487 case AArch64::LDRSpre:
2488 case AArch64::LDURDi:
2489 case AArch64::LDRDpre:
2490 case AArch64::LDURQi:
2491 case AArch64::LDRQpre:
2492 case AArch64::LDURWi:
2493 case AArch64::LDRWpre:
2494 case AArch64::LDURXi:
2495 case AArch64::LDRXpre:
2496 case AArch64::LDURSWi:
2497 case AArch64::LDRSWpre:
2498 return true;
2499 }
2500}
2501
2503 switch (MI.getOpcode()) {
2504 default:
2505 assert((!MI.isCall() || !MI.isReturn()) &&
2506 "Unexpected instruction - was a new tail call opcode introduced?");
2507 return false;
2508 case AArch64::TCRETURNdi:
2509 case AArch64::TCRETURNri:
2510 case AArch64::TCRETURNrix16x17:
2511 case AArch64::TCRETURNrix17:
2512 case AArch64::TCRETURNrinotx16:
2513 case AArch64::TCRETURNriALL:
2514 return true;
2515 }
2516}
2517
2519 switch (Opc) {
2520 default:
2521 llvm_unreachable("Opcode has no flag setting equivalent!");
2522 // 32-bit cases:
2523 case AArch64::ADDWri:
2524 return AArch64::ADDSWri;
2525 case AArch64::ADDWrr:
2526 return AArch64::ADDSWrr;
2527 case AArch64::ADDWrs:
2528 return AArch64::ADDSWrs;
2529 case AArch64::ADDWrx:
2530 return AArch64::ADDSWrx;
2531 case AArch64::ANDWri:
2532 return AArch64::ANDSWri;
2533 case AArch64::ANDWrr:
2534 return AArch64::ANDSWrr;
2535 case AArch64::ANDWrs:
2536 return AArch64::ANDSWrs;
2537 case AArch64::BICWrr:
2538 return AArch64::BICSWrr;
2539 case AArch64::BICWrs:
2540 return AArch64::BICSWrs;
2541 case AArch64::SUBWri:
2542 return AArch64::SUBSWri;
2543 case AArch64::SUBWrr:
2544 return AArch64::SUBSWrr;
2545 case AArch64::SUBWrs:
2546 return AArch64::SUBSWrs;
2547 case AArch64::SUBWrx:
2548 return AArch64::SUBSWrx;
2549 // 64-bit cases:
2550 case AArch64::ADDXri:
2551 return AArch64::ADDSXri;
2552 case AArch64::ADDXrr:
2553 return AArch64::ADDSXrr;
2554 case AArch64::ADDXrs:
2555 return AArch64::ADDSXrs;
2556 case AArch64::ADDXrx:
2557 return AArch64::ADDSXrx;
2558 case AArch64::ANDXri:
2559 return AArch64::ANDSXri;
2560 case AArch64::ANDXrr:
2561 return AArch64::ANDSXrr;
2562 case AArch64::ANDXrs:
2563 return AArch64::ANDSXrs;
2564 case AArch64::BICXrr:
2565 return AArch64::BICSXrr;
2566 case AArch64::BICXrs:
2567 return AArch64::BICSXrs;
2568 case AArch64::SUBXri:
2569 return AArch64::SUBSXri;
2570 case AArch64::SUBXrr:
2571 return AArch64::SUBSXrr;
2572 case AArch64::SUBXrs:
2573 return AArch64::SUBSXrs;
2574 case AArch64::SUBXrx:
2575 return AArch64::SUBSXrx;
2576 // SVE instructions:
2577 case AArch64::AND_PPzPP:
2578 return AArch64::ANDS_PPzPP;
2579 case AArch64::BIC_PPzPP:
2580 return AArch64::BICS_PPzPP;
2581 case AArch64::EOR_PPzPP:
2582 return AArch64::EORS_PPzPP;
2583 case AArch64::NAND_PPzPP:
2584 return AArch64::NANDS_PPzPP;
2585 case AArch64::NOR_PPzPP:
2586 return AArch64::NORS_PPzPP;
2587 case AArch64::ORN_PPzPP:
2588 return AArch64::ORNS_PPzPP;
2589 case AArch64::ORR_PPzPP:
2590 return AArch64::ORRS_PPzPP;
2591 case AArch64::BRKA_PPzP:
2592 return AArch64::BRKAS_PPzP;
2593 case AArch64::BRKPA_PPzPP:
2594 return AArch64::BRKPAS_PPzPP;
2595 case AArch64::BRKB_PPzP:
2596 return AArch64::BRKBS_PPzP;
2597 case AArch64::BRKPB_PPzPP:
2598 return AArch64::BRKPBS_PPzPP;
2599 case AArch64::BRKN_PPzP:
2600 return AArch64::BRKNS_PPzP;
2601 case AArch64::RDFFR_PPz:
2602 return AArch64::RDFFRS_PPz;
2603 case AArch64::PTRUE_B:
2604 return AArch64::PTRUES_B;
2605 }
2606}
2607
2608// Is this a candidate for ld/st merging or pairing? For example, we don't
2609// touch volatiles or load/stores that have a hint to avoid pair formation.
2611
2612 bool IsPreLdSt = isPreLdSt(MI);
2613
2614 // If this is a volatile load/store, don't mess with it.
2615 if (MI.hasOrderedMemoryRef())
2616 return false;
2617
2618 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2619 // For Pre-inc LD/ST, the operand is shifted by one.
2620 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2621 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2622 "Expected a reg or frame index operand.");
2623
2624 // For Pre-indexed addressing quadword instructions, the third operand is the
2625 // immediate value.
2626 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2627
2628 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2629 return false;
2630
2631 // Can't merge/pair if the instruction modifies the base register.
2632 // e.g., ldr x0, [x0]
2633 // This case will never occur with an FI base.
2634 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2635 // STR<S,D,Q,W,X>pre, it can be merged.
2636 // For example:
2637 // ldr q0, [x11, #32]!
2638 // ldr q1, [x11, #16]
2639 // to
2640 // ldp q0, q1, [x11, #32]!
2641 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2642 Register BaseReg = MI.getOperand(1).getReg();
2644 if (MI.modifiesRegister(BaseReg, TRI))
2645 return false;
2646 }
2647
2648 // Check if this load/store has a hint to avoid pair formation.
2649 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2651 return false;
2652
2653 // Do not pair any callee-save store/reload instructions in the
2654 // prologue/epilogue if the CFI information encoded the operations as separate
2655 // instructions, as that will cause the size of the actual prologue to mismatch
2656 // with the prologue size recorded in the Windows CFI.
2657 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2658 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2659 MI.getMF()->getFunction().needsUnwindTableEntry();
2660 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2662 return false;
2663
2664 // On some CPUs quad load/store pairs are slower than two single load/stores.
2665 if (Subtarget.isPaired128Slow()) {
2666 switch (MI.getOpcode()) {
2667 default:
2668 break;
2669 case AArch64::LDURQi:
2670 case AArch64::STURQi:
2671 case AArch64::LDRQui:
2672 case AArch64::STRQui:
2673 return false;
2674 }
2675 }
2676
2677 return true;
2678}
2679
2682 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2683 const TargetRegisterInfo *TRI) const {
2684 if (!LdSt.mayLoadOrStore())
2685 return false;
2686
2687 const MachineOperand *BaseOp;
2688 TypeSize WidthN(0, false);
2689 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2690 WidthN, TRI))
2691 return false;
2692 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2693 // vector.
2694 Width = LocationSize::precise(WidthN);
2695 BaseOps.push_back(BaseOp);
2696 return true;
2697}
2698
2699std::optional<ExtAddrMode>
2701 const TargetRegisterInfo *TRI) const {
2702 const MachineOperand *Base; // Filled with the base operand of MI.
2703 int64_t Offset; // Filled with the offset of MI.
2704 bool OffsetIsScalable;
2705 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2706 return std::nullopt;
2707
2708 if (!Base->isReg())
2709 return std::nullopt;
2710 ExtAddrMode AM;
2711 AM.BaseReg = Base->getReg();
2712 AM.Displacement = Offset;
2713 AM.ScaledReg = 0;
2714 AM.Scale = 0;
2715 return AM;
2716}
2717
2719 Register Reg,
2720 const MachineInstr &AddrI,
2721 ExtAddrMode &AM) const {
2722 // Filter out instructions into which we cannot fold.
2723 unsigned NumBytes;
2724 int64_t OffsetScale = 1;
2725 switch (MemI.getOpcode()) {
2726 default:
2727 return false;
2728
2729 case AArch64::LDURQi:
2730 case AArch64::STURQi:
2731 NumBytes = 16;
2732 break;
2733
2734 case AArch64::LDURDi:
2735 case AArch64::STURDi:
2736 case AArch64::LDURXi:
2737 case AArch64::STURXi:
2738 NumBytes = 8;
2739 break;
2740
2741 case AArch64::LDURWi:
2742 case AArch64::LDURSWi:
2743 case AArch64::STURWi:
2744 NumBytes = 4;
2745 break;
2746
2747 case AArch64::LDURHi:
2748 case AArch64::STURHi:
2749 case AArch64::LDURHHi:
2750 case AArch64::STURHHi:
2751 case AArch64::LDURSHXi:
2752 case AArch64::LDURSHWi:
2753 NumBytes = 2;
2754 break;
2755
2756 case AArch64::LDRBroX:
2757 case AArch64::LDRBBroX:
2758 case AArch64::LDRSBXroX:
2759 case AArch64::LDRSBWroX:
2760 case AArch64::STRBroX:
2761 case AArch64::STRBBroX:
2762 case AArch64::LDURBi:
2763 case AArch64::LDURBBi:
2764 case AArch64::LDURSBXi:
2765 case AArch64::LDURSBWi:
2766 case AArch64::STURBi:
2767 case AArch64::STURBBi:
2768 case AArch64::LDRBui:
2769 case AArch64::LDRBBui:
2770 case AArch64::LDRSBXui:
2771 case AArch64::LDRSBWui:
2772 case AArch64::STRBui:
2773 case AArch64::STRBBui:
2774 NumBytes = 1;
2775 break;
2776
2777 case AArch64::LDRQroX:
2778 case AArch64::STRQroX:
2779 case AArch64::LDRQui:
2780 case AArch64::STRQui:
2781 NumBytes = 16;
2782 OffsetScale = 16;
2783 break;
2784
2785 case AArch64::LDRDroX:
2786 case AArch64::STRDroX:
2787 case AArch64::LDRXroX:
2788 case AArch64::STRXroX:
2789 case AArch64::LDRDui:
2790 case AArch64::STRDui:
2791 case AArch64::LDRXui:
2792 case AArch64::STRXui:
2793 NumBytes = 8;
2794 OffsetScale = 8;
2795 break;
2796
2797 case AArch64::LDRWroX:
2798 case AArch64::LDRSWroX:
2799 case AArch64::STRWroX:
2800 case AArch64::LDRWui:
2801 case AArch64::LDRSWui:
2802 case AArch64::STRWui:
2803 NumBytes = 4;
2804 OffsetScale = 4;
2805 break;
2806
2807 case AArch64::LDRHroX:
2808 case AArch64::STRHroX:
2809 case AArch64::LDRHHroX:
2810 case AArch64::STRHHroX:
2811 case AArch64::LDRSHXroX:
2812 case AArch64::LDRSHWroX:
2813 case AArch64::LDRHui:
2814 case AArch64::STRHui:
2815 case AArch64::LDRHHui:
2816 case AArch64::STRHHui:
2817 case AArch64::LDRSHXui:
2818 case AArch64::LDRSHWui:
2819 NumBytes = 2;
2820 OffsetScale = 2;
2821 break;
2822 }
2823
2824 // Check the fold operand is not the loaded/stored value.
2825 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2826 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2827 return false;
2828
2829 // Handle memory instructions with a [Reg, Reg] addressing mode.
2830 if (MemI.getOperand(2).isReg()) {
2831 // Bail if the addressing mode already includes extension of the offset
2832 // register.
2833 if (MemI.getOperand(3).getImm())
2834 return false;
2835
2836 // Check if we actually have a scaled offset.
2837 if (MemI.getOperand(4).getImm() == 0)
2838 OffsetScale = 1;
2839
2840 // If the address instructions is folded into the base register, then the
2841 // addressing mode must not have a scale. Then we can swap the base and the
2842 // scaled registers.
2843 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2844 return false;
2845
2846 switch (AddrI.getOpcode()) {
2847 default:
2848 return false;
2849
2850 case AArch64::SBFMXri:
2851 // sxtw Xa, Wm
2852 // ldr Xd, [Xn, Xa, lsl #N]
2853 // ->
2854 // ldr Xd, [Xn, Wm, sxtw #N]
2855 if (AddrI.getOperand(2).getImm() != 0 ||
2856 AddrI.getOperand(3).getImm() != 31)
2857 return false;
2858
2859 AM.BaseReg = MemI.getOperand(1).getReg();
2860 if (AM.BaseReg == Reg)
2861 AM.BaseReg = MemI.getOperand(2).getReg();
2862 AM.ScaledReg = AddrI.getOperand(1).getReg();
2863 AM.Scale = OffsetScale;
2864 AM.Displacement = 0;
2866 return true;
2867
2868 case TargetOpcode::SUBREG_TO_REG: {
2869 // mov Wa, Wm
2870 // ldr Xd, [Xn, Xa, lsl #N]
2871 // ->
2872 // ldr Xd, [Xn, Wm, uxtw #N]
2873
2874 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2875 if (AddrI.getOperand(1).getImm() != 0 ||
2876 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2877 return false;
2878
2879 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2880 Register OffsetReg = AddrI.getOperand(2).getReg();
2881 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2882 return false;
2883
2884 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2885 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2886 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2887 DefMI.getOperand(3).getImm() != 0)
2888 return false;
2889
2890 AM.BaseReg = MemI.getOperand(1).getReg();
2891 if (AM.BaseReg == Reg)
2892 AM.BaseReg = MemI.getOperand(2).getReg();
2893 AM.ScaledReg = DefMI.getOperand(2).getReg();
2894 AM.Scale = OffsetScale;
2895 AM.Displacement = 0;
2897 return true;
2898 }
2899 }
2900 }
2901
2902 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2903
2904 // Check we are not breaking a potential conversion to an LDP.
2905 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2906 int64_t NewOffset) -> bool {
2907 int64_t MinOffset, MaxOffset;
2908 switch (NumBytes) {
2909 default:
2910 return true;
2911 case 4:
2912 MinOffset = -256;
2913 MaxOffset = 252;
2914 break;
2915 case 8:
2916 MinOffset = -512;
2917 MaxOffset = 504;
2918 break;
2919 case 16:
2920 MinOffset = -1024;
2921 MaxOffset = 1008;
2922 break;
2923 }
2924 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2925 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2926 };
2927 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2928 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2929 int64_t NewOffset = OldOffset + Disp;
2930 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2931 return false;
2932 // If the old offset would fit into an LDP, but the new offset wouldn't,
2933 // bail out.
2934 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2935 return false;
2936 AM.BaseReg = AddrI.getOperand(1).getReg();
2937 AM.ScaledReg = 0;
2938 AM.Scale = 0;
2939 AM.Displacement = NewOffset;
2941 return true;
2942 };
2943
2944 auto canFoldAddRegIntoAddrMode =
2945 [&](int64_t Scale,
2947 if (MemI.getOperand(2).getImm() != 0)
2948 return false;
2949 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2950 return false;
2951 AM.BaseReg = AddrI.getOperand(1).getReg();
2952 AM.ScaledReg = AddrI.getOperand(2).getReg();
2953 AM.Scale = Scale;
2954 AM.Displacement = 0;
2955 AM.Form = Form;
2956 return true;
2957 };
2958
2959 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2960 unsigned Opcode = MemI.getOpcode();
2961 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2962 Subtarget.isSTRQroSlow();
2963 };
2964
2965 int64_t Disp = 0;
2966 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2967 switch (AddrI.getOpcode()) {
2968 default:
2969 return false;
2970
2971 case AArch64::ADDXri:
2972 // add Xa, Xn, #N
2973 // ldr Xd, [Xa, #M]
2974 // ->
2975 // ldr Xd, [Xn, #N'+M]
2976 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2977 return canFoldAddSubImmIntoAddrMode(Disp);
2978
2979 case AArch64::SUBXri:
2980 // sub Xa, Xn, #N
2981 // ldr Xd, [Xa, #M]
2982 // ->
2983 // ldr Xd, [Xn, #N'+M]
2984 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2985 return canFoldAddSubImmIntoAddrMode(-Disp);
2986
2987 case AArch64::ADDXrs: {
2988 // add Xa, Xn, Xm, lsl #N
2989 // ldr Xd, [Xa]
2990 // ->
2991 // ldr Xd, [Xn, Xm, lsl #N]
2992
2993 // Don't fold the add if the result would be slower, unless optimising for
2994 // size.
2995 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2997 return false;
2998 Shift = AArch64_AM::getShiftValue(Shift);
2999 if (!OptSize) {
3000 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3001 return false;
3002 if (avoidSlowSTRQ(MemI))
3003 return false;
3004 }
3005 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3006 }
3007
3008 case AArch64::ADDXrr:
3009 // add Xa, Xn, Xm
3010 // ldr Xd, [Xa]
3011 // ->
3012 // ldr Xd, [Xn, Xm, lsl #0]
3013
3014 // Don't fold the add if the result would be slower, unless optimising for
3015 // size.
3016 if (!OptSize && avoidSlowSTRQ(MemI))
3017 return false;
3018 return canFoldAddRegIntoAddrMode(1);
3019
3020 case AArch64::ADDXrx:
3021 // add Xa, Xn, Wm, {s,u}xtw #N
3022 // ldr Xd, [Xa]
3023 // ->
3024 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3025
3026 // Don't fold the add if the result would be slower, unless optimising for
3027 // size.
3028 if (!OptSize && avoidSlowSTRQ(MemI))
3029 return false;
3030
3031 // Can fold only sign-/zero-extend of a word.
3032 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3034 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3035 return false;
3036
3037 return canFoldAddRegIntoAddrMode(
3038 1ULL << AArch64_AM::getArithShiftValue(Imm),
3041 }
3042}
3043
3044// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3045// return the opcode of an instruction performing the same operation, but using
3046// the [Reg, Reg] addressing mode.
3047static unsigned regOffsetOpcode(unsigned Opcode) {
3048 switch (Opcode) {
3049 default:
3050 llvm_unreachable("Address folding not implemented for instruction");
3051
3052 case AArch64::LDURQi:
3053 case AArch64::LDRQui:
3054 return AArch64::LDRQroX;
3055 case AArch64::STURQi:
3056 case AArch64::STRQui:
3057 return AArch64::STRQroX;
3058 case AArch64::LDURDi:
3059 case AArch64::LDRDui:
3060 return AArch64::LDRDroX;
3061 case AArch64::STURDi:
3062 case AArch64::STRDui:
3063 return AArch64::STRDroX;
3064 case AArch64::LDURXi:
3065 case AArch64::LDRXui:
3066 return AArch64::LDRXroX;
3067 case AArch64::STURXi:
3068 case AArch64::STRXui:
3069 return AArch64::STRXroX;
3070 case AArch64::LDURWi:
3071 case AArch64::LDRWui:
3072 return AArch64::LDRWroX;
3073 case AArch64::LDURSWi:
3074 case AArch64::LDRSWui:
3075 return AArch64::LDRSWroX;
3076 case AArch64::STURWi:
3077 case AArch64::STRWui:
3078 return AArch64::STRWroX;
3079 case AArch64::LDURHi:
3080 case AArch64::LDRHui:
3081 return AArch64::LDRHroX;
3082 case AArch64::STURHi:
3083 case AArch64::STRHui:
3084 return AArch64::STRHroX;
3085 case AArch64::LDURHHi:
3086 case AArch64::LDRHHui:
3087 return AArch64::LDRHHroX;
3088 case AArch64::STURHHi:
3089 case AArch64::STRHHui:
3090 return AArch64::STRHHroX;
3091 case AArch64::LDURSHXi:
3092 case AArch64::LDRSHXui:
3093 return AArch64::LDRSHXroX;
3094 case AArch64::LDURSHWi:
3095 case AArch64::LDRSHWui:
3096 return AArch64::LDRSHWroX;
3097 case AArch64::LDURBi:
3098 case AArch64::LDRBui:
3099 return AArch64::LDRBroX;
3100 case AArch64::LDURBBi:
3101 case AArch64::LDRBBui:
3102 return AArch64::LDRBBroX;
3103 case AArch64::LDURSBXi:
3104 case AArch64::LDRSBXui:
3105 return AArch64::LDRSBXroX;
3106 case AArch64::LDURSBWi:
3107 case AArch64::LDRSBWui:
3108 return AArch64::LDRSBWroX;
3109 case AArch64::STURBi:
3110 case AArch64::STRBui:
3111 return AArch64::STRBroX;
3112 case AArch64::STURBBi:
3113 case AArch64::STRBBui:
3114 return AArch64::STRBBroX;
3115 }
3116}
3117
3118// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3119// the opcode of an instruction performing the same operation, but using the
3120// [Reg, #Imm] addressing mode with scaled offset.
3121unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3122 switch (Opcode) {
3123 default:
3124 llvm_unreachable("Address folding not implemented for instruction");
3125
3126 case AArch64::LDURQi:
3127 Scale = 16;
3128 return AArch64::LDRQui;
3129 case AArch64::STURQi:
3130 Scale = 16;
3131 return AArch64::STRQui;
3132 case AArch64::LDURDi:
3133 Scale = 8;
3134 return AArch64::LDRDui;
3135 case AArch64::STURDi:
3136 Scale = 8;
3137 return AArch64::STRDui;
3138 case AArch64::LDURXi:
3139 Scale = 8;
3140 return AArch64::LDRXui;
3141 case AArch64::STURXi:
3142 Scale = 8;
3143 return AArch64::STRXui;
3144 case AArch64::LDURWi:
3145 Scale = 4;
3146 return AArch64::LDRWui;
3147 case AArch64::LDURSWi:
3148 Scale = 4;
3149 return AArch64::LDRSWui;
3150 case AArch64::STURWi:
3151 Scale = 4;
3152 return AArch64::STRWui;
3153 case AArch64::LDURHi:
3154 Scale = 2;
3155 return AArch64::LDRHui;
3156 case AArch64::STURHi:
3157 Scale = 2;
3158 return AArch64::STRHui;
3159 case AArch64::LDURHHi:
3160 Scale = 2;
3161 return AArch64::LDRHHui;
3162 case AArch64::STURHHi:
3163 Scale = 2;
3164 return AArch64::STRHHui;
3165 case AArch64::LDURSHXi:
3166 Scale = 2;
3167 return AArch64::LDRSHXui;
3168 case AArch64::LDURSHWi:
3169 Scale = 2;
3170 return AArch64::LDRSHWui;
3171 case AArch64::LDURBi:
3172 Scale = 1;
3173 return AArch64::LDRBui;
3174 case AArch64::LDURBBi:
3175 Scale = 1;
3176 return AArch64::LDRBBui;
3177 case AArch64::LDURSBXi:
3178 Scale = 1;
3179 return AArch64::LDRSBXui;
3180 case AArch64::LDURSBWi:
3181 Scale = 1;
3182 return AArch64::LDRSBWui;
3183 case AArch64::STURBi:
3184 Scale = 1;
3185 return AArch64::STRBui;
3186 case AArch64::STURBBi:
3187 Scale = 1;
3188 return AArch64::STRBBui;
3189 case AArch64::LDRQui:
3190 case AArch64::STRQui:
3191 Scale = 16;
3192 return Opcode;
3193 case AArch64::LDRDui:
3194 case AArch64::STRDui:
3195 case AArch64::LDRXui:
3196 case AArch64::STRXui:
3197 Scale = 8;
3198 return Opcode;
3199 case AArch64::LDRWui:
3200 case AArch64::LDRSWui:
3201 case AArch64::STRWui:
3202 Scale = 4;
3203 return Opcode;
3204 case AArch64::LDRHui:
3205 case AArch64::STRHui:
3206 case AArch64::LDRHHui:
3207 case AArch64::STRHHui:
3208 case AArch64::LDRSHXui:
3209 case AArch64::LDRSHWui:
3210 Scale = 2;
3211 return Opcode;
3212 case AArch64::LDRBui:
3213 case AArch64::LDRBBui:
3214 case AArch64::LDRSBXui:
3215 case AArch64::LDRSBWui:
3216 case AArch64::STRBui:
3217 case AArch64::STRBBui:
3218 Scale = 1;
3219 return Opcode;
3220 }
3221}
3222
3223// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3224// the opcode of an instruction performing the same operation, but using the
3225// [Reg, #Imm] addressing mode with unscaled offset.
3226unsigned unscaledOffsetOpcode(unsigned Opcode) {
3227 switch (Opcode) {
3228 default:
3229 llvm_unreachable("Address folding not implemented for instruction");
3230
3231 case AArch64::LDURQi:
3232 case AArch64::STURQi:
3233 case AArch64::LDURDi:
3234 case AArch64::STURDi:
3235 case AArch64::LDURXi:
3236 case AArch64::STURXi:
3237 case AArch64::LDURWi:
3238 case AArch64::LDURSWi:
3239 case AArch64::STURWi:
3240 case AArch64::LDURHi:
3241 case AArch64::STURHi:
3242 case AArch64::LDURHHi:
3243 case AArch64::STURHHi:
3244 case AArch64::LDURSHXi:
3245 case AArch64::LDURSHWi:
3246 case AArch64::LDURBi:
3247 case AArch64::STURBi:
3248 case AArch64::LDURBBi:
3249 case AArch64::STURBBi:
3250 case AArch64::LDURSBWi:
3251 case AArch64::LDURSBXi:
3252 return Opcode;
3253 case AArch64::LDRQui:
3254 return AArch64::LDURQi;
3255 case AArch64::STRQui:
3256 return AArch64::STURQi;
3257 case AArch64::LDRDui:
3258 return AArch64::LDURDi;
3259 case AArch64::STRDui:
3260 return AArch64::STURDi;
3261 case AArch64::LDRXui:
3262 return AArch64::LDURXi;
3263 case AArch64::STRXui:
3264 return AArch64::STURXi;
3265 case AArch64::LDRWui:
3266 return AArch64::LDURWi;
3267 case AArch64::LDRSWui:
3268 return AArch64::LDURSWi;
3269 case AArch64::STRWui:
3270 return AArch64::STURWi;
3271 case AArch64::LDRHui:
3272 return AArch64::LDURHi;
3273 case AArch64::STRHui:
3274 return AArch64::STURHi;
3275 case AArch64::LDRHHui:
3276 return AArch64::LDURHHi;
3277 case AArch64::STRHHui:
3278 return AArch64::STURHHi;
3279 case AArch64::LDRSHXui:
3280 return AArch64::LDURSHXi;
3281 case AArch64::LDRSHWui:
3282 return AArch64::LDURSHWi;
3283 case AArch64::LDRBBui:
3284 return AArch64::LDURBBi;
3285 case AArch64::LDRBui:
3286 return AArch64::LDURBi;
3287 case AArch64::STRBBui:
3288 return AArch64::STURBBi;
3289 case AArch64::STRBui:
3290 return AArch64::STURBi;
3291 case AArch64::LDRSBWui:
3292 return AArch64::LDURSBWi;
3293 case AArch64::LDRSBXui:
3294 return AArch64::LDURSBXi;
3295 }
3296}
3297
3298// Given the opcode of a memory load/store instruction, return the opcode of an
3299// instruction performing the same operation, but using
3300// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3301// offset register.
3302static unsigned offsetExtendOpcode(unsigned Opcode) {
3303 switch (Opcode) {
3304 default:
3305 llvm_unreachable("Address folding not implemented for instruction");
3306
3307 case AArch64::LDRQroX:
3308 case AArch64::LDURQi:
3309 case AArch64::LDRQui:
3310 return AArch64::LDRQroW;
3311 case AArch64::STRQroX:
3312 case AArch64::STURQi:
3313 case AArch64::STRQui:
3314 return AArch64::STRQroW;
3315 case AArch64::LDRDroX:
3316 case AArch64::LDURDi:
3317 case AArch64::LDRDui:
3318 return AArch64::LDRDroW;
3319 case AArch64::STRDroX:
3320 case AArch64::STURDi:
3321 case AArch64::STRDui:
3322 return AArch64::STRDroW;
3323 case AArch64::LDRXroX:
3324 case AArch64::LDURXi:
3325 case AArch64::LDRXui:
3326 return AArch64::LDRXroW;
3327 case AArch64::STRXroX:
3328 case AArch64::STURXi:
3329 case AArch64::STRXui:
3330 return AArch64::STRXroW;
3331 case AArch64::LDRWroX:
3332 case AArch64::LDURWi:
3333 case AArch64::LDRWui:
3334 return AArch64::LDRWroW;
3335 case AArch64::LDRSWroX:
3336 case AArch64::LDURSWi:
3337 case AArch64::LDRSWui:
3338 return AArch64::LDRSWroW;
3339 case AArch64::STRWroX:
3340 case AArch64::STURWi:
3341 case AArch64::STRWui:
3342 return AArch64::STRWroW;
3343 case AArch64::LDRHroX:
3344 case AArch64::LDURHi:
3345 case AArch64::LDRHui:
3346 return AArch64::LDRHroW;
3347 case AArch64::STRHroX:
3348 case AArch64::STURHi:
3349 case AArch64::STRHui:
3350 return AArch64::STRHroW;
3351 case AArch64::LDRHHroX:
3352 case AArch64::LDURHHi:
3353 case AArch64::LDRHHui:
3354 return AArch64::LDRHHroW;
3355 case AArch64::STRHHroX:
3356 case AArch64::STURHHi:
3357 case AArch64::STRHHui:
3358 return AArch64::STRHHroW;
3359 case AArch64::LDRSHXroX:
3360 case AArch64::LDURSHXi:
3361 case AArch64::LDRSHXui:
3362 return AArch64::LDRSHXroW;
3363 case AArch64::LDRSHWroX:
3364 case AArch64::LDURSHWi:
3365 case AArch64::LDRSHWui:
3366 return AArch64::LDRSHWroW;
3367 case AArch64::LDRBroX:
3368 case AArch64::LDURBi:
3369 case AArch64::LDRBui:
3370 return AArch64::LDRBroW;
3371 case AArch64::LDRBBroX:
3372 case AArch64::LDURBBi:
3373 case AArch64::LDRBBui:
3374 return AArch64::LDRBBroW;
3375 case AArch64::LDRSBXroX:
3376 case AArch64::LDURSBXi:
3377 case AArch64::LDRSBXui:
3378 return AArch64::LDRSBXroW;
3379 case AArch64::LDRSBWroX:
3380 case AArch64::LDURSBWi:
3381 case AArch64::LDRSBWui:
3382 return AArch64::LDRSBWroW;
3383 case AArch64::STRBroX:
3384 case AArch64::STURBi:
3385 case AArch64::STRBui:
3386 return AArch64::STRBroW;
3387 case AArch64::STRBBroX:
3388 case AArch64::STURBBi:
3389 case AArch64::STRBBui:
3390 return AArch64::STRBBroW;
3391 }
3392}
3393
3395 const ExtAddrMode &AM) const {
3396
3397 const DebugLoc &DL = MemI.getDebugLoc();
3398 MachineBasicBlock &MBB = *MemI.getParent();
3400
3402 if (AM.ScaledReg) {
3403 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3404 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3405 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3406 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3407 .addReg(MemI.getOperand(0).getReg(),
3408 MemI.mayLoad() ? RegState::Define : 0)
3409 .addReg(AM.BaseReg)
3410 .addReg(AM.ScaledReg)
3411 .addImm(0)
3412 .addImm(AM.Scale > 1)
3413 .setMemRefs(MemI.memoperands())
3414 .setMIFlags(MemI.getFlags());
3415 return B.getInstr();
3416 }
3417
3418 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3419 "Addressing mode not supported for folding");
3420
3421 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3422 unsigned Scale = 1;
3423 unsigned Opcode = MemI.getOpcode();
3424 if (isInt<9>(AM.Displacement))
3425 Opcode = unscaledOffsetOpcode(Opcode);
3426 else
3427 Opcode = scaledOffsetOpcode(Opcode, Scale);
3428
3429 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3430 .addReg(MemI.getOperand(0).getReg(),
3431 MemI.mayLoad() ? RegState::Define : 0)
3432 .addReg(AM.BaseReg)
3433 .addImm(AM.Displacement / Scale)
3434 .setMemRefs(MemI.memoperands())
3435 .setMIFlags(MemI.getFlags());
3436 return B.getInstr();
3437 }
3438
3441 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3442 assert(AM.ScaledReg && !AM.Displacement &&
3443 "Address offset can be a register or an immediate, but not both");
3444 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3445 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3446 // Make sure the offset register is in the correct register class.
3447 Register OffsetReg = AM.ScaledReg;
3448 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3449 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3450 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3451 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3452 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3453 }
3454 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3455 .addReg(MemI.getOperand(0).getReg(),
3456 MemI.mayLoad() ? RegState::Define : 0)
3457 .addReg(AM.BaseReg)
3458 .addReg(OffsetReg)
3460 .addImm(AM.Scale != 1)
3461 .setMemRefs(MemI.memoperands())
3462 .setMIFlags(MemI.getFlags());
3463
3464 return B.getInstr();
3465 }
3466
3468 "Function must not be called with an addressing mode it can't handle");
3469}
3470
3472 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3473 bool &OffsetIsScalable, TypeSize &Width,
3474 const TargetRegisterInfo *TRI) const {
3475 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3476 // Handle only loads/stores with base register followed by immediate offset.
3477 if (LdSt.getNumExplicitOperands() == 3) {
3478 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3479 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3480 !LdSt.getOperand(2).isImm())
3481 return false;
3482 } else if (LdSt.getNumExplicitOperands() == 4) {
3483 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3484 if (!LdSt.getOperand(1).isReg() ||
3485 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3486 !LdSt.getOperand(3).isImm())
3487 return false;
3488 } else
3489 return false;
3490
3491 // Get the scaling factor for the instruction and set the width for the
3492 // instruction.
3493 TypeSize Scale(0U, false);
3494 int64_t Dummy1, Dummy2;
3495
3496 // If this returns false, then it's an instruction we don't want to handle.
3497 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3498 return false;
3499
3500 // Compute the offset. Offset is calculated as the immediate operand
3501 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3502 // set to 1.
3503 if (LdSt.getNumExplicitOperands() == 3) {
3504 BaseOp = &LdSt.getOperand(1);
3505 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3506 } else {
3507 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3508 BaseOp = &LdSt.getOperand(2);
3509 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3510 }
3511 OffsetIsScalable = Scale.isScalable();
3512
3513 if (!BaseOp->isReg() && !BaseOp->isFI())
3514 return false;
3515
3516 return true;
3517}
3518
3521 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3522 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3523 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3524 return OfsOp;
3525}
3526
3527bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3528 TypeSize &Width, int64_t &MinOffset,
3529 int64_t &MaxOffset) {
3530 switch (Opcode) {
3531 // Not a memory operation or something we want to handle.
3532 default:
3533 Scale = TypeSize::getFixed(0);
3534 Width = TypeSize::getFixed(0);
3535 MinOffset = MaxOffset = 0;
3536 return false;
3537 case AArch64::STRWpost:
3538 case AArch64::LDRWpost:
3539 Width = TypeSize::getFixed(32);
3540 Scale = TypeSize::getFixed(4);
3541 MinOffset = -256;
3542 MaxOffset = 255;
3543 break;
3544 case AArch64::LDURQi:
3545 case AArch64::STURQi:
3546 Width = TypeSize::getFixed(16);
3547 Scale = TypeSize::getFixed(1);
3548 MinOffset = -256;
3549 MaxOffset = 255;
3550 break;
3551 case AArch64::PRFUMi:
3552 case AArch64::LDURXi:
3553 case AArch64::LDURDi:
3554 case AArch64::LDAPURXi:
3555 case AArch64::STURXi:
3556 case AArch64::STURDi:
3557 case AArch64::STLURXi:
3558 Width = TypeSize::getFixed(8);
3559 Scale = TypeSize::getFixed(1);
3560 MinOffset = -256;
3561 MaxOffset = 255;
3562 break;
3563 case AArch64::LDURWi:
3564 case AArch64::LDURSi:
3565 case AArch64::LDURSWi:
3566 case AArch64::LDAPURi:
3567 case AArch64::LDAPURSWi:
3568 case AArch64::STURWi:
3569 case AArch64::STURSi:
3570 case AArch64::STLURWi:
3571 Width = TypeSize::getFixed(4);
3572 Scale = TypeSize::getFixed(1);
3573 MinOffset = -256;
3574 MaxOffset = 255;
3575 break;
3576 case AArch64::LDURHi:
3577 case AArch64::LDURHHi:
3578 case AArch64::LDURSHXi:
3579 case AArch64::LDURSHWi:
3580 case AArch64::LDAPURHi:
3581 case AArch64::LDAPURSHWi:
3582 case AArch64::LDAPURSHXi:
3583 case AArch64::STURHi:
3584 case AArch64::STURHHi:
3585 case AArch64::STLURHi:
3586 Width = TypeSize::getFixed(2);
3587 Scale = TypeSize::getFixed(1);
3588 MinOffset = -256;
3589 MaxOffset = 255;
3590 break;
3591 case AArch64::LDURBi:
3592 case AArch64::LDURBBi:
3593 case AArch64::LDURSBXi:
3594 case AArch64::LDURSBWi:
3595 case AArch64::LDAPURBi:
3596 case AArch64::LDAPURSBWi:
3597 case AArch64::LDAPURSBXi:
3598 case AArch64::STURBi:
3599 case AArch64::STURBBi:
3600 case AArch64::STLURBi:
3601 Width = TypeSize::getFixed(1);
3602 Scale = TypeSize::getFixed(1);
3603 MinOffset = -256;
3604 MaxOffset = 255;
3605 break;
3606 case AArch64::LDPQi:
3607 case AArch64::LDNPQi:
3608 case AArch64::STPQi:
3609 case AArch64::STNPQi:
3610 Scale = TypeSize::getFixed(16);
3611 Width = TypeSize::getFixed(32);
3612 MinOffset = -64;
3613 MaxOffset = 63;
3614 break;
3615 case AArch64::LDRQui:
3616 case AArch64::STRQui:
3617 Scale = TypeSize::getFixed(16);
3618 Width = TypeSize::getFixed(16);
3619 MinOffset = 0;
3620 MaxOffset = 4095;
3621 break;
3622 case AArch64::LDPXi:
3623 case AArch64::LDPDi:
3624 case AArch64::LDNPXi:
3625 case AArch64::LDNPDi:
3626 case AArch64::STPXi:
3627 case AArch64::STPDi:
3628 case AArch64::STNPXi:
3629 case AArch64::STNPDi:
3630 Scale = TypeSize::getFixed(8);
3631 Width = TypeSize::getFixed(16);
3632 MinOffset = -64;
3633 MaxOffset = 63;
3634 break;
3635 case AArch64::PRFMui:
3636 case AArch64::LDRXui:
3637 case AArch64::LDRDui:
3638 case AArch64::STRXui:
3639 case AArch64::STRDui:
3640 Scale = TypeSize::getFixed(8);
3641 Width = TypeSize::getFixed(8);
3642 MinOffset = 0;
3643 MaxOffset = 4095;
3644 break;
3645 case AArch64::StoreSwiftAsyncContext:
3646 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3647 Scale = TypeSize::getFixed(1);
3648 Width = TypeSize::getFixed(8);
3649 MinOffset = 0;
3650 MaxOffset = 4095;
3651 break;
3652 case AArch64::LDPWi:
3653 case AArch64::LDPSi:
3654 case AArch64::LDNPWi:
3655 case AArch64::LDNPSi:
3656 case AArch64::STPWi:
3657 case AArch64::STPSi:
3658 case AArch64::STNPWi:
3659 case AArch64::STNPSi:
3660 Scale = TypeSize::getFixed(4);
3661 Width = TypeSize::getFixed(8);
3662 MinOffset = -64;
3663 MaxOffset = 63;
3664 break;
3665 case AArch64::LDRWui:
3666 case AArch64::LDRSui:
3667 case AArch64::LDRSWui:
3668 case AArch64::STRWui:
3669 case AArch64::STRSui:
3670 Scale = TypeSize::getFixed(4);
3671 Width = TypeSize::getFixed(4);
3672 MinOffset = 0;
3673 MaxOffset = 4095;
3674 break;
3675 case AArch64::LDRHui:
3676 case AArch64::LDRHHui:
3677 case AArch64::LDRSHWui:
3678 case AArch64::LDRSHXui:
3679 case AArch64::STRHui:
3680 case AArch64::STRHHui:
3681 Scale = TypeSize::getFixed(2);
3682 Width = TypeSize::getFixed(2);
3683 MinOffset = 0;
3684 MaxOffset = 4095;
3685 break;
3686 case AArch64::LDRBui:
3687 case AArch64::LDRBBui:
3688 case AArch64::LDRSBWui:
3689 case AArch64::LDRSBXui:
3690 case AArch64::STRBui:
3691 case AArch64::STRBBui:
3692 Scale = TypeSize::getFixed(1);
3693 Width = TypeSize::getFixed(1);
3694 MinOffset = 0;
3695 MaxOffset = 4095;
3696 break;
3697 case AArch64::STPXpre:
3698 case AArch64::LDPXpost:
3699 case AArch64::STPDpre:
3700 case AArch64::LDPDpost:
3701 Scale = TypeSize::getFixed(8);
3702 Width = TypeSize::getFixed(8);
3703 MinOffset = -512;
3704 MaxOffset = 504;
3705 break;
3706 case AArch64::STPQpre:
3707 case AArch64::LDPQpost:
3708 Scale = TypeSize::getFixed(16);
3709 Width = TypeSize::getFixed(16);
3710 MinOffset = -1024;
3711 MaxOffset = 1008;
3712 break;
3713 case AArch64::STRXpre:
3714 case AArch64::STRDpre:
3715 case AArch64::LDRXpost:
3716 case AArch64::LDRDpost:
3717 Scale = TypeSize::getFixed(1);
3718 Width = TypeSize::getFixed(8);
3719 MinOffset = -256;
3720 MaxOffset = 255;
3721 break;
3722 case AArch64::STRQpre:
3723 case AArch64::LDRQpost:
3724 Scale = TypeSize::getFixed(1);
3725 Width = TypeSize::getFixed(16);
3726 MinOffset = -256;
3727 MaxOffset = 255;
3728 break;
3729 case AArch64::ADDG:
3730 Scale = TypeSize::getFixed(16);
3731 Width = TypeSize::getFixed(0);
3732 MinOffset = 0;
3733 MaxOffset = 63;
3734 break;
3735 case AArch64::TAGPstack:
3736 Scale = TypeSize::getFixed(16);
3737 Width = TypeSize::getFixed(0);
3738 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3739 // of 63 (not 64!).
3740 MinOffset = -63;
3741 MaxOffset = 63;
3742 break;
3743 case AArch64::LDG:
3744 case AArch64::STGi:
3745 case AArch64::STZGi:
3746 Scale = TypeSize::getFixed(16);
3747 Width = TypeSize::getFixed(16);
3748 MinOffset = -256;
3749 MaxOffset = 255;
3750 break;
3751 case AArch64::STR_ZZZZXI:
3752 case AArch64::LDR_ZZZZXI:
3753 Scale = TypeSize::getScalable(16);
3754 Width = TypeSize::getScalable(16 * 4);
3755 MinOffset = -256;
3756 MaxOffset = 252;
3757 break;
3758 case AArch64::STR_ZZZXI:
3759 case AArch64::LDR_ZZZXI:
3760 Scale = TypeSize::getScalable(16);
3761 Width = TypeSize::getScalable(16 * 3);
3762 MinOffset = -256;
3763 MaxOffset = 253;
3764 break;
3765 case AArch64::STR_ZZXI:
3766 case AArch64::LDR_ZZXI:
3767 Scale = TypeSize::getScalable(16);
3768 Width = TypeSize::getScalable(16 * 2);
3769 MinOffset = -256;
3770 MaxOffset = 254;
3771 break;
3772 case AArch64::LDR_PXI:
3773 case AArch64::STR_PXI:
3774 Scale = TypeSize::getScalable(2);
3775 Width = TypeSize::getScalable(2);
3776 MinOffset = -256;
3777 MaxOffset = 255;
3778 break;
3779 case AArch64::LDR_PPXI:
3780 case AArch64::STR_PPXI:
3781 Scale = TypeSize::getScalable(2);
3782 Width = TypeSize::getScalable(2 * 2);
3783 MinOffset = -256;
3784 MaxOffset = 254;
3785 break;
3786 case AArch64::LDR_ZXI:
3787 case AArch64::STR_ZXI:
3788 Scale = TypeSize::getScalable(16);
3789 Width = TypeSize::getScalable(16);
3790 MinOffset = -256;
3791 MaxOffset = 255;
3792 break;
3793 case AArch64::LD1B_IMM:
3794 case AArch64::LD1H_IMM:
3795 case AArch64::LD1W_IMM:
3796 case AArch64::LD1D_IMM:
3797 case AArch64::LDNT1B_ZRI:
3798 case AArch64::LDNT1H_ZRI:
3799 case AArch64::LDNT1W_ZRI:
3800 case AArch64::LDNT1D_ZRI:
3801 case AArch64::ST1B_IMM:
3802 case AArch64::ST1H_IMM:
3803 case AArch64::ST1W_IMM:
3804 case AArch64::ST1D_IMM:
3805 case AArch64::STNT1B_ZRI:
3806 case AArch64::STNT1H_ZRI:
3807 case AArch64::STNT1W_ZRI:
3808 case AArch64::STNT1D_ZRI:
3809 case AArch64::LDNF1B_IMM:
3810 case AArch64::LDNF1H_IMM:
3811 case AArch64::LDNF1W_IMM:
3812 case AArch64::LDNF1D_IMM:
3813 // A full vectors worth of data
3814 // Width = mbytes * elements
3815 Scale = TypeSize::getScalable(16);
3816 Width = TypeSize::getScalable(16);
3817 MinOffset = -8;
3818 MaxOffset = 7;
3819 break;
3820 case AArch64::LD2B_IMM:
3821 case AArch64::LD2H_IMM:
3822 case AArch64::LD2W_IMM:
3823 case AArch64::LD2D_IMM:
3824 case AArch64::ST2B_IMM:
3825 case AArch64::ST2H_IMM:
3826 case AArch64::ST2W_IMM:
3827 case AArch64::ST2D_IMM:
3828 Scale = TypeSize::getScalable(32);
3829 Width = TypeSize::getScalable(16 * 2);
3830 MinOffset = -8;
3831 MaxOffset = 7;
3832 break;
3833 case AArch64::LD3B_IMM:
3834 case AArch64::LD3H_IMM:
3835 case AArch64::LD3W_IMM:
3836 case AArch64::LD3D_IMM:
3837 case AArch64::ST3B_IMM:
3838 case AArch64::ST3H_IMM:
3839 case AArch64::ST3W_IMM:
3840 case AArch64::ST3D_IMM:
3841 Scale = TypeSize::getScalable(48);
3842 Width = TypeSize::getScalable(16 * 3);
3843 MinOffset = -8;
3844 MaxOffset = 7;
3845 break;
3846 case AArch64::LD4B_IMM:
3847 case AArch64::LD4H_IMM:
3848 case AArch64::LD4W_IMM:
3849 case AArch64::LD4D_IMM:
3850 case AArch64::ST4B_IMM:
3851 case AArch64::ST4H_IMM:
3852 case AArch64::ST4W_IMM:
3853 case AArch64::ST4D_IMM:
3854 Scale = TypeSize::getScalable(64);
3855 Width = TypeSize::getScalable(16 * 4);
3856 MinOffset = -8;
3857 MaxOffset = 7;
3858 break;
3859 case AArch64::LD1B_H_IMM:
3860 case AArch64::LD1SB_H_IMM:
3861 case AArch64::LD1H_S_IMM:
3862 case AArch64::LD1SH_S_IMM:
3863 case AArch64::LD1W_D_IMM:
3864 case AArch64::LD1SW_D_IMM:
3865 case AArch64::ST1B_H_IMM:
3866 case AArch64::ST1H_S_IMM:
3867 case AArch64::ST1W_D_IMM:
3868 case AArch64::LDNF1B_H_IMM:
3869 case AArch64::LDNF1SB_H_IMM:
3870 case AArch64::LDNF1H_S_IMM:
3871 case AArch64::LDNF1SH_S_IMM:
3872 case AArch64::LDNF1W_D_IMM:
3873 case AArch64::LDNF1SW_D_IMM:
3874 // A half vector worth of data
3875 // Width = mbytes * elements
3876 Scale = TypeSize::getScalable(8);
3877 Width = TypeSize::getScalable(8);
3878 MinOffset = -8;
3879 MaxOffset = 7;
3880 break;
3881 case AArch64::LD1B_S_IMM:
3882 case AArch64::LD1SB_S_IMM:
3883 case AArch64::LD1H_D_IMM:
3884 case AArch64::LD1SH_D_IMM:
3885 case AArch64::ST1B_S_IMM:
3886 case AArch64::ST1H_D_IMM:
3887 case AArch64::LDNF1B_S_IMM:
3888 case AArch64::LDNF1SB_S_IMM:
3889 case AArch64::LDNF1H_D_IMM:
3890 case AArch64::LDNF1SH_D_IMM:
3891 // A quarter vector worth of data
3892 // Width = mbytes * elements
3893 Scale = TypeSize::getScalable(4);
3894 Width = TypeSize::getScalable(4);
3895 MinOffset = -8;
3896 MaxOffset = 7;
3897 break;
3898 case AArch64::LD1B_D_IMM:
3899 case AArch64::LD1SB_D_IMM:
3900 case AArch64::ST1B_D_IMM:
3901 case AArch64::LDNF1B_D_IMM:
3902 case AArch64::LDNF1SB_D_IMM:
3903 // A eighth vector worth of data
3904 // Width = mbytes * elements
3905 Scale = TypeSize::getScalable(2);
3906 Width = TypeSize::getScalable(2);
3907 MinOffset = -8;
3908 MaxOffset = 7;
3909 break;
3910 case AArch64::ST2Gi:
3911 case AArch64::STZ2Gi:
3912 Scale = TypeSize::getFixed(16);
3913 Width = TypeSize::getFixed(32);
3914 MinOffset = -256;
3915 MaxOffset = 255;
3916 break;
3917 case AArch64::STGPi:
3918 Scale = TypeSize::getFixed(16);
3919 Width = TypeSize::getFixed(16);
3920 MinOffset = -64;
3921 MaxOffset = 63;
3922 break;
3923 case AArch64::LD1RB_IMM:
3924 case AArch64::LD1RB_H_IMM:
3925 case AArch64::LD1RB_S_IMM:
3926 case AArch64::LD1RB_D_IMM:
3927 case AArch64::LD1RSB_H_IMM:
3928 case AArch64::LD1RSB_S_IMM:
3929 case AArch64::LD1RSB_D_IMM:
3930 Scale = TypeSize::getFixed(1);
3931 Width = TypeSize::getFixed(1);
3932 MinOffset = 0;
3933 MaxOffset = 63;
3934 break;
3935 case AArch64::LD1RH_IMM:
3936 case AArch64::LD1RH_S_IMM:
3937 case AArch64::LD1RH_D_IMM:
3938 case AArch64::LD1RSH_S_IMM:
3939 case AArch64::LD1RSH_D_IMM:
3940 Scale = TypeSize::getFixed(2);
3941 Width = TypeSize::getFixed(2);
3942 MinOffset = 0;
3943 MaxOffset = 63;
3944 break;
3945 case AArch64::LD1RW_IMM:
3946 case AArch64::LD1RW_D_IMM:
3947 case AArch64::LD1RSW_IMM:
3948 Scale = TypeSize::getFixed(4);
3949 Width = TypeSize::getFixed(4);
3950 MinOffset = 0;
3951 MaxOffset = 63;
3952 break;
3953 case AArch64::LD1RD_IMM:
3954 Scale = TypeSize::getFixed(8);
3955 Width = TypeSize::getFixed(8);
3956 MinOffset = 0;
3957 MaxOffset = 63;
3958 break;
3959 }
3960
3961 return true;
3962}
3963
3964// Scaling factor for unscaled load or store.
3966 switch (Opc) {
3967 default:
3968 llvm_unreachable("Opcode has unknown scale!");
3969 case AArch64::LDRBBui:
3970 case AArch64::LDURBBi:
3971 case AArch64::LDRSBWui:
3972 case AArch64::LDURSBWi:
3973 case AArch64::STRBBui:
3974 case AArch64::STURBBi:
3975 return 1;
3976 case AArch64::LDRHHui:
3977 case AArch64::LDURHHi:
3978 case AArch64::LDRSHWui:
3979 case AArch64::LDURSHWi:
3980 case AArch64::STRHHui:
3981 case AArch64::STURHHi:
3982 return 2;
3983 case AArch64::LDRSui:
3984 case AArch64::LDURSi:
3985 case AArch64::LDRSpre:
3986 case AArch64::LDRSWui:
3987 case AArch64::LDURSWi:
3988 case AArch64::LDRSWpre:
3989 case AArch64::LDRWpre:
3990 case AArch64::LDRWui:
3991 case AArch64::LDURWi:
3992 case AArch64::STRSui:
3993 case AArch64::STURSi:
3994 case AArch64::STRSpre:
3995 case AArch64::STRWui:
3996 case AArch64::STURWi:
3997 case AArch64::STRWpre:
3998 case AArch64::LDPSi:
3999 case AArch64::LDPSWi:
4000 case AArch64::LDPWi:
4001 case AArch64::STPSi:
4002 case AArch64::STPWi:
4003 return 4;
4004 case AArch64::LDRDui:
4005 case AArch64::LDURDi:
4006 case AArch64::LDRDpre:
4007 case AArch64::LDRXui:
4008 case AArch64::LDURXi:
4009 case AArch64::LDRXpre:
4010 case AArch64::STRDui:
4011 case AArch64::STURDi:
4012 case AArch64::STRDpre:
4013 case AArch64::STRXui:
4014 case AArch64::STURXi:
4015 case AArch64::STRXpre:
4016 case AArch64::LDPDi:
4017 case AArch64::LDPXi:
4018 case AArch64::STPDi:
4019 case AArch64::STPXi:
4020 return 8;
4021 case AArch64::LDRQui:
4022 case AArch64::LDURQi:
4023 case AArch64::STRQui:
4024 case AArch64::STURQi:
4025 case AArch64::STRQpre:
4026 case AArch64::LDPQi:
4027 case AArch64::LDRQpre:
4028 case AArch64::STPQi:
4029 case AArch64::STGi:
4030 case AArch64::STZGi:
4031 case AArch64::ST2Gi:
4032 case AArch64::STZ2Gi:
4033 case AArch64::STGPi:
4034 return 16;
4035 }
4036}
4037
4039 switch (MI.getOpcode()) {
4040 default:
4041 return false;
4042 case AArch64::LDRWpre:
4043 case AArch64::LDRXpre:
4044 case AArch64::LDRSWpre:
4045 case AArch64::LDRSpre:
4046 case AArch64::LDRDpre:
4047 case AArch64::LDRQpre:
4048 return true;
4049 }
4050}
4051
4053 switch (MI.getOpcode()) {
4054 default:
4055 return false;
4056 case AArch64::STRWpre:
4057 case AArch64::STRXpre:
4058 case AArch64::STRSpre:
4059 case AArch64::STRDpre:
4060 case AArch64::STRQpre:
4061 return true;
4062 }
4063}
4064
4066 return isPreLd(MI) || isPreSt(MI);
4067}
4068
4070 switch (MI.getOpcode()) {
4071 default:
4072 return false;
4073 case AArch64::LDPSi:
4074 case AArch64::LDPSWi:
4075 case AArch64::LDPDi:
4076 case AArch64::LDPQi:
4077 case AArch64::LDPWi:
4078 case AArch64::LDPXi:
4079 case AArch64::STPSi:
4080 case AArch64::STPDi:
4081 case AArch64::STPQi:
4082 case AArch64::STPWi:
4083 case AArch64::STPXi:
4084 case AArch64::STGPi:
4085 return true;
4086 }
4087}
4088
4090 unsigned Idx =
4092 : 1;
4093 return MI.getOperand(Idx);
4094}
4095
4096const MachineOperand &
4098 unsigned Idx =
4100 : 2;
4101 return MI.getOperand(Idx);
4102}
4103
4105 Register Reg) {
4106 if (MI.getParent() == nullptr)
4107 return nullptr;
4108 const MachineFunction *MF = MI.getParent()->getParent();
4109 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4110}
4111
4113 auto IsHFPR = [&](const MachineOperand &Op) {
4114 if (!Op.isReg())
4115 return false;
4116 auto Reg = Op.getReg();
4117 if (Reg.isPhysical())
4118 return AArch64::FPR16RegClass.contains(Reg);
4119 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4120 return TRC == &AArch64::FPR16RegClass ||
4121 TRC == &AArch64::FPR16_loRegClass;
4122 };
4123 return llvm::any_of(MI.operands(), IsHFPR);
4124}
4125
4127 auto IsQFPR = [&](const MachineOperand &Op) {
4128 if (!Op.isReg())
4129 return false;
4130 auto Reg = Op.getReg();
4131 if (Reg.isPhysical())
4132 return AArch64::FPR128RegClass.contains(Reg);
4133 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4134 return TRC == &AArch64::FPR128RegClass ||
4135 TRC == &AArch64::FPR128_loRegClass;
4136 };
4137 return llvm::any_of(MI.operands(), IsQFPR);
4138}
4139
4141 switch (MI.getOpcode()) {
4142 case AArch64::BRK:
4143 case AArch64::HLT:
4144 case AArch64::PACIASP:
4145 case AArch64::PACIBSP:
4146 // Implicit BTI behavior.
4147 return true;
4148 case AArch64::PAUTH_PROLOGUE:
4149 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4150 return true;
4151 case AArch64::HINT: {
4152 unsigned Imm = MI.getOperand(0).getImm();
4153 // Explicit BTI instruction.
4154 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4155 return true;
4156 // PACI(A|B)SP instructions.
4157 if (Imm == 25 || Imm == 27)
4158 return true;
4159 return false;
4160 }
4161 default:
4162 return false;
4163 }
4164}
4165
4167 auto IsFPR = [&](const MachineOperand &Op) {
4168 if (!Op.isReg())
4169 return false;
4170 auto Reg = Op.getReg();
4171 if (Reg.isPhysical())
4172 return AArch64::FPR128RegClass.contains(Reg) ||
4173 AArch64::FPR64RegClass.contains(Reg) ||
4174 AArch64::FPR32RegClass.contains(Reg) ||
4175 AArch64::FPR16RegClass.contains(Reg) ||
4176 AArch64::FPR8RegClass.contains(Reg);
4177
4178 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4179 return TRC == &AArch64::FPR128RegClass ||
4180 TRC == &AArch64::FPR128_loRegClass ||
4181 TRC == &AArch64::FPR64RegClass ||
4182 TRC == &AArch64::FPR64_loRegClass ||
4183 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4184 TRC == &AArch64::FPR8RegClass;
4185 };
4186 return llvm::any_of(MI.operands(), IsFPR);
4187}
4188
4189// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4190// scaled.
4191static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4192 int Scale = AArch64InstrInfo::getMemScale(Opc);
4193
4194 // If the byte-offset isn't a multiple of the stride, we can't scale this
4195 // offset.
4196 if (Offset % Scale != 0)
4197 return false;
4198
4199 // Convert the byte-offset used by unscaled into an "element" offset used
4200 // by the scaled pair load/store instructions.
4201 Offset /= Scale;
4202 return true;
4203}
4204
4205static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4206 if (FirstOpc == SecondOpc)
4207 return true;
4208 // We can also pair sign-ext and zero-ext instructions.
4209 switch (FirstOpc) {
4210 default:
4211 return false;
4212 case AArch64::STRSui:
4213 case AArch64::STURSi:
4214 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4215 case AArch64::STRDui:
4216 case AArch64::STURDi:
4217 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4218 case AArch64::STRQui:
4219 case AArch64::STURQi:
4220 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4221 case AArch64::STRWui:
4222 case AArch64::STURWi:
4223 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4224 case AArch64::STRXui:
4225 case AArch64::STURXi:
4226 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4227 case AArch64::LDRSui:
4228 case AArch64::LDURSi:
4229 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4230 case AArch64::LDRDui:
4231 case AArch64::LDURDi:
4232 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4233 case AArch64::LDRQui:
4234 case AArch64::LDURQi:
4235 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4236 case AArch64::LDRWui:
4237 case AArch64::LDURWi:
4238 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4239 case AArch64::LDRSWui:
4240 case AArch64::LDURSWi:
4241 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4242 case AArch64::LDRXui:
4243 case AArch64::LDURXi:
4244 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4245 }
4246 // These instructions can't be paired based on their opcodes.
4247 return false;
4248}
4249
4250static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4251 int64_t Offset1, unsigned Opcode1, int FI2,
4252 int64_t Offset2, unsigned Opcode2) {
4253 // Accesses through fixed stack object frame indices may access a different
4254 // fixed stack slot. Check that the object offsets + offsets match.
4255 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4256 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4257 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4258 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4259 // Convert to scaled object offsets.
4260 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4261 if (ObjectOffset1 % Scale1 != 0)
4262 return false;
4263 ObjectOffset1 /= Scale1;
4264 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4265 if (ObjectOffset2 % Scale2 != 0)
4266 return false;
4267 ObjectOffset2 /= Scale2;
4268 ObjectOffset1 += Offset1;
4269 ObjectOffset2 += Offset2;
4270 return ObjectOffset1 + 1 == ObjectOffset2;
4271 }
4272
4273 return FI1 == FI2;
4274}
4275
4276/// Detect opportunities for ldp/stp formation.
4277///
4278/// Only called for LdSt for which getMemOperandWithOffset returns true.
4280 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4281 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4282 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4283 unsigned NumBytes) const {
4284 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4285 const MachineOperand &BaseOp1 = *BaseOps1.front();
4286 const MachineOperand &BaseOp2 = *BaseOps2.front();
4287 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4288 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4289 if (BaseOp1.getType() != BaseOp2.getType())
4290 return false;
4291
4292 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4293 "Only base registers and frame indices are supported.");
4294
4295 // Check for both base regs and base FI.
4296 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4297 return false;
4298
4299 // Only cluster up to a single pair.
4300 if (ClusterSize > 2)
4301 return false;
4302
4303 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4304 return false;
4305
4306 // Can we pair these instructions based on their opcodes?
4307 unsigned FirstOpc = FirstLdSt.getOpcode();
4308 unsigned SecondOpc = SecondLdSt.getOpcode();
4309 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4310 return false;
4311
4312 // Can't merge volatiles or load/stores that have a hint to avoid pair
4313 // formation, for example.
4314 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4315 !isCandidateToMergeOrPair(SecondLdSt))
4316 return false;
4317
4318 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4319 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4320 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4321 return false;
4322
4323 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4324 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4325 return false;
4326
4327 // Pairwise instructions have a 7-bit signed offset field.
4328 if (Offset1 > 63 || Offset1 < -64)
4329 return false;
4330
4331 // The caller should already have ordered First/SecondLdSt by offset.
4332 // Note: except for non-equal frame index bases
4333 if (BaseOp1.isFI()) {
4334 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4335 "Caller should have ordered offsets.");
4336
4337 const MachineFrameInfo &MFI =
4338 FirstLdSt.getParent()->getParent()->getFrameInfo();
4339 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4340 BaseOp2.getIndex(), Offset2, SecondOpc);
4341 }
4342
4343 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4344
4345 return Offset1 + 1 == Offset2;
4346}
4347
4349 unsigned Reg, unsigned SubIdx,
4350 unsigned State,
4351 const TargetRegisterInfo *TRI) {
4352 if (!SubIdx)
4353 return MIB.addReg(Reg, State);
4354
4356 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4357 return MIB.addReg(Reg, State, SubIdx);
4358}
4359
4360static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4361 unsigned NumRegs) {
4362 // We really want the positive remainder mod 32 here, that happens to be
4363 // easily obtainable with a mask.
4364 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4365}
4366
4369 const DebugLoc &DL, MCRegister DestReg,
4370 MCRegister SrcReg, bool KillSrc,
4371 unsigned Opcode,
4372 ArrayRef<unsigned> Indices) const {
4373 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4375 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4376 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4377 unsigned NumRegs = Indices.size();
4378
4379 int SubReg = 0, End = NumRegs, Incr = 1;
4380 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4381 SubReg = NumRegs - 1;
4382 End = -1;
4383 Incr = -1;
4384 }
4385
4386 for (; SubReg != End; SubReg += Incr) {
4387 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4388 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4389 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4390 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4391 }
4392}
4393
4396 DebugLoc DL, unsigned DestReg,
4397 unsigned SrcReg, bool KillSrc,
4398 unsigned Opcode, unsigned ZeroReg,
4399 llvm::ArrayRef<unsigned> Indices) const {
4401 unsigned NumRegs = Indices.size();
4402
4403#ifndef NDEBUG
4404 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4405 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4406 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4407 "GPR reg sequences should not be able to overlap");
4408#endif
4409
4410 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4411 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4412 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4413 MIB.addReg(ZeroReg);
4414 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4415 MIB.addImm(0);
4416 }
4417}
4418
4421 const DebugLoc &DL, MCRegister DestReg,
4422 MCRegister SrcReg, bool KillSrc) const {
4423 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4424 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4426
4427 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4428 // If either operand is WSP, expand to ADD #0.
4429 if (Subtarget.hasZeroCycleRegMove()) {
4430 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4431 MCRegister DestRegX = TRI->getMatchingSuperReg(
4432 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4433 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4434 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4435 // This instruction is reading and writing X registers. This may upset
4436 // the register scavenger and machine verifier, so we need to indicate
4437 // that we are reading an undefined value from SrcRegX, but a proper
4438 // value from SrcReg.
4439 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4440 .addReg(SrcRegX, RegState::Undef)
4441 .addImm(0)
4443 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4444 } else {
4445 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4446 .addReg(SrcReg, getKillRegState(KillSrc))
4447 .addImm(0)
4449 }
4450 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4451 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4452 .addImm(0)
4454 } else {
4455 if (Subtarget.hasZeroCycleRegMove()) {
4456 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4457 MCRegister DestRegX = TRI->getMatchingSuperReg(
4458 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4459 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4460 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4461 // This instruction is reading and writing X registers. This may upset
4462 // the register scavenger and machine verifier, so we need to indicate
4463 // that we are reading an undefined value from SrcRegX, but a proper
4464 // value from SrcReg.
4465 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4466 .addReg(AArch64::XZR)
4467 .addReg(SrcRegX, RegState::Undef)
4468 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4469 } else {
4470 // Otherwise, expand to ORR WZR.
4471 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4472 .addReg(AArch64::WZR)
4473 .addReg(SrcReg, getKillRegState(KillSrc));
4474 }
4475 }
4476 return;
4477 }
4478
4479 // Copy a Predicate register by ORRing with itself.
4480 if (AArch64::PPRRegClass.contains(DestReg) &&
4481 AArch64::PPRRegClass.contains(SrcReg)) {
4482 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4483 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4484 .addReg(SrcReg) // Pg
4485 .addReg(SrcReg)
4486 .addReg(SrcReg, getKillRegState(KillSrc));
4487 return;
4488 }
4489
4490 // Copy a predicate-as-counter register by ORRing with itself as if it
4491 // were a regular predicate (mask) register.
4492 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4493 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4494 if (DestIsPNR || SrcIsPNR) {
4495 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4496 "Unexpected predicate-as-counter register.");
4497 auto ToPPR = [](MCRegister R) -> MCRegister {
4498 return (R - AArch64::PN0) + AArch64::P0;
4499 };
4500 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4501 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4502
4503 if (PPRSrcReg != PPRDestReg) {
4504 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4505 .addReg(PPRSrcReg) // Pg
4506 .addReg(PPRSrcReg)
4507 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4508 if (DestIsPNR)
4509 NewMI.addDef(DestReg, RegState::Implicit);
4510 }
4511 return;
4512 }
4513
4514 // Copy a Z register by ORRing with itself.
4515 if (AArch64::ZPRRegClass.contains(DestReg) &&
4516 AArch64::ZPRRegClass.contains(SrcReg)) {
4517 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4518 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4519 .addReg(SrcReg)
4520 .addReg(SrcReg, getKillRegState(KillSrc));
4521 return;
4522 }
4523
4524 // Copy a Z register pair by copying the individual sub-registers.
4525 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4526 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4527 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4528 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4529 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4530 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4531 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4532 Indices);
4533 return;
4534 }
4535
4536 // Copy a Z register triple by copying the individual sub-registers.
4537 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4538 AArch64::ZPR3RegClass.contains(SrcReg)) {
4539 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4540 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4541 AArch64::zsub2};
4542 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4543 Indices);
4544 return;
4545 }
4546
4547 // Copy a Z register quad by copying the individual sub-registers.
4548 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4549 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4550 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4551 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4552 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4553 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4554 AArch64::zsub2, AArch64::zsub3};
4555 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4556 Indices);
4557 return;
4558 }
4559
4560 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4561 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4562 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4563 // If either operand is SP, expand to ADD #0.
4564 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4565 .addReg(SrcReg, getKillRegState(KillSrc))
4566 .addImm(0)
4568 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4569 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4570 .addImm(0)
4572 } else {
4573 // Otherwise, expand to ORR XZR.
4574 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4575 .addReg(AArch64::XZR)
4576 .addReg(SrcReg, getKillRegState(KillSrc));
4577 }
4578 return;
4579 }
4580
4581 // Copy a DDDD register quad by copying the individual sub-registers.
4582 if (AArch64::DDDDRegClass.contains(DestReg) &&
4583 AArch64::DDDDRegClass.contains(SrcReg)) {
4584 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4585 AArch64::dsub2, AArch64::dsub3};
4586 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4587 Indices);
4588 return;
4589 }
4590
4591 // Copy a DDD register triple by copying the individual sub-registers.
4592 if (AArch64::DDDRegClass.contains(DestReg) &&
4593 AArch64::DDDRegClass.contains(SrcReg)) {
4594 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4595 AArch64::dsub2};
4596 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4597 Indices);
4598 return;
4599 }
4600
4601 // Copy a DD register pair by copying the individual sub-registers.
4602 if (AArch64::DDRegClass.contains(DestReg) &&
4603 AArch64::DDRegClass.contains(SrcReg)) {
4604 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4605 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4606 Indices);
4607 return;
4608 }
4609
4610 // Copy a QQQQ register quad by copying the individual sub-registers.
4611 if (AArch64::QQQQRegClass.contains(DestReg) &&
4612 AArch64::QQQQRegClass.contains(SrcReg)) {
4613 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4614 AArch64::qsub2, AArch64::qsub3};
4615 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4616 Indices);
4617 return;
4618 }
4619
4620 // Copy a QQQ register triple by copying the individual sub-registers.
4621 if (AArch64::QQQRegClass.contains(DestReg) &&
4622 AArch64::QQQRegClass.contains(SrcReg)) {
4623 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4624 AArch64::qsub2};
4625 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4626 Indices);
4627 return;
4628 }
4629
4630 // Copy a QQ register pair by copying the individual sub-registers.
4631 if (AArch64::QQRegClass.contains(DestReg) &&
4632 AArch64::QQRegClass.contains(SrcReg)) {
4633 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4634 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4635 Indices);
4636 return;
4637 }
4638
4639 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4640 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4641 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4642 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4643 AArch64::XZR, Indices);
4644 return;
4645 }
4646
4647 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4648 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4649 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4650 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4651 AArch64::WZR, Indices);
4652 return;
4653 }
4654
4655 if (AArch64::FPR128RegClass.contains(DestReg) &&
4656 AArch64::FPR128RegClass.contains(SrcReg)) {
4657 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4658 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4659 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4660 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4661 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4662 else if (Subtarget.hasNEON())
4663 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4664 .addReg(SrcReg)
4665 .addReg(SrcReg, getKillRegState(KillSrc));
4666 else {
4667 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4668 .addReg(AArch64::SP, RegState::Define)
4669 .addReg(SrcReg, getKillRegState(KillSrc))
4670 .addReg(AArch64::SP)
4671 .addImm(-16);
4672 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4673 .addReg(AArch64::SP, RegState::Define)
4674 .addReg(DestReg, RegState::Define)
4675 .addReg(AArch64::SP)
4676 .addImm(16);
4677 }
4678 return;
4679 }
4680
4681 if (AArch64::FPR64RegClass.contains(DestReg) &&
4682 AArch64::FPR64RegClass.contains(SrcReg)) {
4683 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4684 .addReg(SrcReg, getKillRegState(KillSrc));
4685 return;
4686 }
4687
4688 if (AArch64::FPR32RegClass.contains(DestReg) &&
4689 AArch64::FPR32RegClass.contains(SrcReg)) {
4690 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4691 .addReg(SrcReg, getKillRegState(KillSrc));
4692 return;
4693 }
4694
4695 if (AArch64::FPR16RegClass.contains(DestReg) &&
4696 AArch64::FPR16RegClass.contains(SrcReg)) {
4697 DestReg =
4698 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4699 SrcReg =
4700 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4701 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4702 .addReg(SrcReg, getKillRegState(KillSrc));
4703 return;
4704 }
4705
4706 if (AArch64::FPR8RegClass.contains(DestReg) &&
4707 AArch64::FPR8RegClass.contains(SrcReg)) {
4708 DestReg =
4709 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4710 SrcReg =
4711 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4712 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4713 .addReg(SrcReg, getKillRegState(KillSrc));
4714 return;
4715 }
4716
4717 // Copies between GPR64 and FPR64.
4718 if (AArch64::FPR64RegClass.contains(DestReg) &&
4719 AArch64::GPR64RegClass.contains(SrcReg)) {
4720 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4721 .addReg(SrcReg, getKillRegState(KillSrc));
4722 return;
4723 }
4724 if (AArch64::GPR64RegClass.contains(DestReg) &&
4725 AArch64::FPR64RegClass.contains(SrcReg)) {
4726 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4727 .addReg(SrcReg, getKillRegState(KillSrc));
4728 return;
4729 }
4730 // Copies between GPR32 and FPR32.
4731 if (AArch64::FPR32RegClass.contains(DestReg) &&
4732 AArch64::GPR32RegClass.contains(SrcReg)) {
4733 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4734 .addReg(SrcReg, getKillRegState(KillSrc));
4735 return;
4736 }
4737 if (AArch64::GPR32RegClass.contains(DestReg) &&
4738 AArch64::FPR32RegClass.contains(SrcReg)) {
4739 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4740 .addReg(SrcReg, getKillRegState(KillSrc));
4741 return;
4742 }
4743
4744 if (DestReg == AArch64::NZCV) {
4745 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4746 BuildMI(MBB, I, DL, get(AArch64::MSR))
4747 .addImm(AArch64SysReg::NZCV)
4748 .addReg(SrcReg, getKillRegState(KillSrc))
4749 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4750 return;
4751 }
4752
4753 if (SrcReg == AArch64::NZCV) {
4754 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4755 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4756 .addImm(AArch64SysReg::NZCV)
4757 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4758 return;
4759 }
4760
4761#ifndef NDEBUG
4763 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4764 << TRI.getRegAsmName(SrcReg) << "\n";
4765#endif
4766 llvm_unreachable("unimplemented reg-to-reg copy");
4767}
4768
4771 MachineBasicBlock::iterator InsertBefore,
4772 const MCInstrDesc &MCID,
4773 Register SrcReg, bool IsKill,
4774 unsigned SubIdx0, unsigned SubIdx1, int FI,
4775 MachineMemOperand *MMO) {
4776 Register SrcReg0 = SrcReg;
4777 Register SrcReg1 = SrcReg;
4778 if (SrcReg.isPhysical()) {
4779 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4780 SubIdx0 = 0;
4781 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4782 SubIdx1 = 0;
4783 }
4784 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4785 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4786 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4787 .addFrameIndex(FI)
4788 .addImm(0)
4789 .addMemOperand(MMO);
4790}
4791
4794 Register SrcReg, bool isKill, int FI,
4795 const TargetRegisterClass *RC,
4796 const TargetRegisterInfo *TRI,
4797 Register VReg) const {
4798 MachineFunction &MF = *MBB.getParent();
4799 MachineFrameInfo &MFI = MF.getFrameInfo();
4800
4802 MachineMemOperand *MMO =
4804 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4805 unsigned Opc = 0;
4806 bool Offset = true;
4808 unsigned StackID = TargetStackID::Default;
4809 switch (TRI->getSpillSize(*RC)) {
4810 case 1:
4811 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4812 Opc = AArch64::STRBui;
4813 break;
4814 case 2: {
4815 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4816 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4817 Opc = AArch64::STRHui;
4818 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4819 assert(Subtarget.hasSVEorSME() &&
4820 "Unexpected register store without SVE store instructions");
4821 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4822 "Unexpected register store without SVE2p1 or SME2");
4823 Opc = AArch64::STR_PXI;
4825 }
4826 break;
4827 }
4828 case 4:
4829 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4830 Opc = AArch64::STRWui;
4831 if (SrcReg.isVirtual())
4832 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4833 else
4834 assert(SrcReg != AArch64::WSP);
4835 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4836 Opc = AArch64::STRSui;
4837 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4838 Opc = AArch64::STR_PPXI;
4840 }
4841 break;
4842 case 8:
4843 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4844 Opc = AArch64::STRXui;
4845 if (SrcReg.isVirtual())
4846 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4847 else
4848 assert(SrcReg != AArch64::SP);
4849 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4850 Opc = AArch64::STRDui;
4851 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4853 get(AArch64::STPWi), SrcReg, isKill,
4854 AArch64::sube32, AArch64::subo32, FI, MMO);
4855 return;
4856 }
4857 break;
4858 case 16:
4859 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4860 Opc = AArch64::STRQui;
4861 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4862 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4863 Opc = AArch64::ST1Twov1d;
4864 Offset = false;
4865 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4867 get(AArch64::STPXi), SrcReg, isKill,
4868 AArch64::sube64, AArch64::subo64, FI, MMO);
4869 return;
4870 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4871 assert(Subtarget.hasSVEorSME() &&
4872 "Unexpected register store without SVE store instructions");
4873 Opc = AArch64::STR_ZXI;
4875 }
4876 break;
4877 case 24:
4878 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4879 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4880 Opc = AArch64::ST1Threev1d;
4881 Offset = false;
4882 }
4883 break;
4884 case 32:
4885 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4886 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4887 Opc = AArch64::ST1Fourv1d;
4888 Offset = false;
4889 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4890 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4891 Opc = AArch64::ST1Twov2d;
4892 Offset = false;
4893 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4894 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4895 assert(Subtarget.hasSVEorSME() &&
4896 "Unexpected register store without SVE store instructions");
4897 Opc = AArch64::STR_ZZXI;
4899 }
4900 break;
4901 case 48:
4902 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4903 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4904 Opc = AArch64::ST1Threev2d;
4905 Offset = false;
4906 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4907 assert(Subtarget.hasSVEorSME() &&
4908 "Unexpected register store without SVE store instructions");
4909 Opc = AArch64::STR_ZZZXI;
4911 }
4912 break;
4913 case 64:
4914 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4915 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4916 Opc = AArch64::ST1Fourv2d;
4917 Offset = false;
4918 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4919 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4920 assert(Subtarget.hasSVEorSME() &&
4921 "Unexpected register store without SVE store instructions");
4922 Opc = AArch64::STR_ZZZZXI;
4924 }
4925 break;
4926 }
4927 assert(Opc && "Unknown register class");
4928 MFI.setStackID(FI, StackID);
4929
4930 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4931 .addReg(SrcReg, getKillRegState(isKill))
4932 .addFrameIndex(FI);
4933
4934 if (Offset)
4935 MI.addImm(0);
4936 if (PNRReg.isValid())
4937 MI.addDef(PNRReg, RegState::Implicit);
4938 MI.addMemOperand(MMO);
4939}
4940
4943 MachineBasicBlock::iterator InsertBefore,
4944 const MCInstrDesc &MCID,
4945 Register DestReg, unsigned SubIdx0,
4946 unsigned SubIdx1, int FI,
4947 MachineMemOperand *MMO) {
4948 Register DestReg0 = DestReg;
4949 Register DestReg1 = DestReg;
4950 bool IsUndef = true;
4951 if (DestReg.isPhysical()) {
4952 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4953 SubIdx0 = 0;
4954 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4955 SubIdx1 = 0;
4956 IsUndef = false;
4957 }
4958 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4959 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4960 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4961 .addFrameIndex(FI)
4962 .addImm(0)
4963 .addMemOperand(MMO);
4964}
4965
4968 Register DestReg, int FI,
4969 const TargetRegisterClass *RC,
4970 const TargetRegisterInfo *TRI,
4971 Register VReg) const {
4972 MachineFunction &MF = *MBB.getParent();
4973 MachineFrameInfo &MFI = MF.getFrameInfo();
4975 MachineMemOperand *MMO =
4977 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4978
4979 unsigned Opc = 0;
4980 bool Offset = true;
4981 unsigned StackID = TargetStackID::Default;
4983 switch (TRI->getSpillSize(*RC)) {
4984 case 1:
4985 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4986 Opc = AArch64::LDRBui;
4987 break;
4988 case 2: {
4989 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4990 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4991 Opc = AArch64::LDRHui;
4992 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4993 assert(Subtarget.hasSVEorSME() &&
4994 "Unexpected register load without SVE load instructions");
4995 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4996 "Unexpected register load without SVE2p1 or SME2");
4997 if (IsPNR)
4998 PNRReg = DestReg;
4999 Opc = AArch64::LDR_PXI;
5001 }
5002 break;
5003 }
5004 case 4:
5005 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5006 Opc = AArch64::LDRWui;
5007 if (DestReg.isVirtual())
5008 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5009 else
5010 assert(DestReg != AArch64::WSP);
5011 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5012 Opc = AArch64::LDRSui;
5013 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5014 Opc = AArch64::LDR_PPXI;
5016 }
5017 break;
5018 case 8:
5019 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5020 Opc = AArch64::LDRXui;
5021 if (DestReg.isVirtual())
5022 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5023 else
5024 assert(DestReg != AArch64::SP);
5025 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5026 Opc = AArch64::LDRDui;
5027 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5029 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5030 AArch64::subo32, FI, MMO);
5031 return;
5032 }
5033 break;
5034 case 16:
5035 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5036 Opc = AArch64::LDRQui;
5037 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5038 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5039 Opc = AArch64::LD1Twov1d;
5040 Offset = false;
5041 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5043 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5044 AArch64::subo64, FI, MMO);
5045 return;
5046 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5047 assert(Subtarget.hasSVEorSME() &&
5048 "Unexpected register load without SVE load instructions");
5049 Opc = AArch64::LDR_ZXI;
5051 }
5052 break;
5053 case 24:
5054 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5055 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5056 Opc = AArch64::LD1Threev1d;
5057 Offset = false;
5058 }
5059 break;
5060 case 32:
5061 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5062 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5063 Opc = AArch64::LD1Fourv1d;
5064 Offset = false;
5065 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5066 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5067 Opc = AArch64::LD1Twov2d;
5068 Offset = false;
5069 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5070 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5071 assert(Subtarget.hasSVEorSME() &&
5072 "Unexpected register load without SVE load instructions");
5073 Opc = AArch64::LDR_ZZXI;
5075 }
5076 break;
5077 case 48:
5078 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5079 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5080 Opc = AArch64::LD1Threev2d;
5081 Offset = false;
5082 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5083 assert(Subtarget.hasSVEorSME() &&
5084 "Unexpected register load without SVE load instructions");
5085 Opc = AArch64::LDR_ZZZXI;
5087 }
5088 break;
5089 case 64:
5090 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5091 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5092 Opc = AArch64::LD1Fourv2d;
5093 Offset = false;
5094 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5095 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5096 assert(Subtarget.hasSVEorSME() &&
5097 "Unexpected register load without SVE load instructions");
5098 Opc = AArch64::LDR_ZZZZXI;
5100 }
5101 break;
5102 }
5103
5104 assert(Opc && "Unknown register class");
5105 MFI.setStackID(FI, StackID);
5106
5107 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5108 .addReg(DestReg, getDefRegState(true))
5109 .addFrameIndex(FI);
5110 if (Offset)
5111 MI.addImm(0);
5112 if (PNRReg.isValid() && !PNRReg.isVirtual())
5113 MI.addDef(PNRReg, RegState::Implicit);
5114 MI.addMemOperand(MMO);
5115
5116 if (PNRReg.isValid() && PNRReg.isVirtual())
5117 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5118 .addReg(DestReg);
5119}
5120
5122 const MachineInstr &UseMI,
5123 const TargetRegisterInfo *TRI) {
5124 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5125 UseMI.getIterator()),
5126 [TRI](const MachineInstr &I) {
5127 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5128 I.readsRegister(AArch64::NZCV, TRI);
5129 });
5130}
5131
5133 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5134 // The smallest scalable element supported by scaled SVE addressing
5135 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5136 // byte offset must always be a multiple of 2.
5137 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5138
5139 // VGSized offsets are divided by '2', because the VG register is the
5140 // the number of 64bit granules as opposed to 128bit vector chunks,
5141 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5142 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5143 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5144 ByteSized = Offset.getFixed();
5145 VGSized = Offset.getScalable() / 2;
5146}
5147
5148/// Returns the offset in parts to which this frame offset can be
5149/// decomposed for the purpose of describing a frame offset.
5150/// For non-scalable offsets this is simply its byte size.
5152 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5153 int64_t &NumDataVectors) {
5154 // The smallest scalable element supported by scaled SVE addressing
5155 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5156 // byte offset must always be a multiple of 2.
5157 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5158
5159 NumBytes = Offset.getFixed();
5160 NumDataVectors = 0;
5161 NumPredicateVectors = Offset.getScalable() / 2;
5162 // This method is used to get the offsets to adjust the frame offset.
5163 // If the function requires ADDPL to be used and needs more than two ADDPL
5164 // instructions, part of the offset is folded into NumDataVectors so that it
5165 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5166 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5167 NumPredicateVectors > 62) {
5168 NumDataVectors = NumPredicateVectors / 8;
5169 NumPredicateVectors -= NumDataVectors * 8;
5170 }
5171}
5172
5173// Convenience function to create a DWARF expression for
5174// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5175static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5176 int NumVGScaledBytes, unsigned VG,
5177 llvm::raw_string_ostream &Comment) {
5178 uint8_t buffer[16];
5179
5180 if (NumBytes) {
5181 Expr.push_back(dwarf::DW_OP_consts);
5182 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5183 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5184 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5185 }
5186
5187 if (NumVGScaledBytes) {
5188 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5189 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5190
5191 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5192 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5193 Expr.push_back(0);
5194
5195 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5196 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5197
5198 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5199 << std::abs(NumVGScaledBytes) << " * VG";
5200 }
5201}
5202
5203// Creates an MCCFIInstruction:
5204// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5206 unsigned Reg,
5207 const StackOffset &Offset) {
5208 int64_t NumBytes, NumVGScaledBytes;
5210 NumVGScaledBytes);
5211 std::string CommentBuffer;
5212 llvm::raw_string_ostream Comment(CommentBuffer);
5213
5214 if (Reg == AArch64::SP)
5215 Comment << "sp";
5216 else if (Reg == AArch64::FP)
5217 Comment << "fp";
5218 else
5219 Comment << printReg(Reg, &TRI);
5220
5221 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5222 SmallString<64> Expr;
5223 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5224 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5225 Expr.push_back(0);
5226 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5227 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5228
5229 // Wrap this into DW_CFA_def_cfa.
5230 SmallString<64> DefCfaExpr;
5231 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5232 uint8_t buffer[16];
5233 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5234 DefCfaExpr.append(Expr.str());
5235 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5236 Comment.str());
5237}
5238
5240 unsigned FrameReg, unsigned Reg,
5241 const StackOffset &Offset,
5242 bool LastAdjustmentWasScalable) {
5243 if (Offset.getScalable())
5244 return createDefCFAExpression(TRI, Reg, Offset);
5245
5246 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5247 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5248
5249 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5250 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5251}
5252
5254 unsigned Reg,
5255 const StackOffset &OffsetFromDefCFA) {
5256 int64_t NumBytes, NumVGScaledBytes;
5258 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5259
5260 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5261
5262 // Non-scalable offsets can use DW_CFA_offset directly.
5263 if (!NumVGScaledBytes)
5264 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5265
5266 std::string CommentBuffer;
5267 llvm::raw_string_ostream Comment(CommentBuffer);
5268 Comment << printReg(Reg, &TRI) << " @ cfa";
5269
5270 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5271 SmallString<64> OffsetExpr;
5272 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5273 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5274
5275 // Wrap this into DW_CFA_expression
5276 SmallString<64> CfaExpr;
5277 CfaExpr.push_back(dwarf::DW_CFA_expression);
5278 uint8_t buffer[16];
5279 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5280 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5281 CfaExpr.append(OffsetExpr.str());
5282
5283 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5284 Comment.str());
5285}
5286
5287// Helper function to emit a frame offset adjustment from a given
5288// pointer (SrcReg), stored into DestReg. This function is explicit
5289// in that it requires the opcode.
5292 const DebugLoc &DL, unsigned DestReg,
5293 unsigned SrcReg, int64_t Offset, unsigned Opc,
5294 const TargetInstrInfo *TII,
5295 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5296 bool *HasWinCFI, bool EmitCFAOffset,
5297 StackOffset CFAOffset, unsigned FrameReg) {
5298 int Sign = 1;
5299 unsigned MaxEncoding, ShiftSize;
5300 switch (Opc) {
5301 case AArch64::ADDXri:
5302 case AArch64::ADDSXri:
5303 case AArch64::SUBXri:
5304 case AArch64::SUBSXri:
5305 MaxEncoding = 0xfff;
5306 ShiftSize = 12;
5307 break;
5308 case AArch64::ADDVL_XXI:
5309 case AArch64::ADDPL_XXI:
5310 case AArch64::ADDSVL_XXI:
5311 case AArch64::ADDSPL_XXI:
5312 MaxEncoding = 31;
5313 ShiftSize = 0;
5314 if (Offset < 0) {
5315 MaxEncoding = 32;
5316 Sign = -1;
5317 Offset = -Offset;
5318 }
5319 break;
5320 default:
5321 llvm_unreachable("Unsupported opcode");
5322 }
5323
5324 // `Offset` can be in bytes or in "scalable bytes".
5325 int VScale = 1;
5326 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5327 VScale = 16;
5328 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5329 VScale = 2;
5330
5331 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5332 // scratch register. If DestReg is a virtual register, use it as the
5333 // scratch register; otherwise, create a new virtual register (to be
5334 // replaced by the scavenger at the end of PEI). That case can be optimized
5335 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5336 // register can be loaded with offset%8 and the add/sub can use an extending
5337 // instruction with LSL#3.
5338 // Currently the function handles any offsets but generates a poor sequence
5339 // of code.
5340 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5341
5342 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5343 Register TmpReg = DestReg;
5344 if (TmpReg == AArch64::XZR)
5346 &AArch64::GPR64RegClass);
5347 do {
5348 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5349 unsigned LocalShiftSize = 0;
5350 if (ThisVal > MaxEncoding) {
5351 ThisVal = ThisVal >> ShiftSize;
5352 LocalShiftSize = ShiftSize;
5353 }
5354 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5355 "Encoding cannot handle value that big");
5356
5357 Offset -= ThisVal << LocalShiftSize;
5358 if (Offset == 0)
5359 TmpReg = DestReg;
5360 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5361 .addReg(SrcReg)
5362 .addImm(Sign * (int)ThisVal);
5363 if (ShiftSize)
5364 MBI = MBI.addImm(
5366 MBI = MBI.setMIFlag(Flag);
5367
5368 auto Change =
5369 VScale == 1
5370 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5371 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5372 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5373 CFAOffset += Change;
5374 else
5375 CFAOffset -= Change;
5376 if (EmitCFAOffset && DestReg == TmpReg) {
5377 MachineFunction &MF = *MBB.getParent();
5378 const TargetSubtargetInfo &STI = MF.getSubtarget();
5379 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5380
5381 unsigned CFIIndex = MF.addFrameInst(
5382 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5383 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5384 .addCFIIndex(CFIIndex)
5385 .setMIFlags(Flag);
5386 }
5387
5388 if (NeedsWinCFI) {
5389 assert(Sign == 1 && "SEH directives should always have a positive sign");
5390 int Imm = (int)(ThisVal << LocalShiftSize);
5391 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5392 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5393 if (HasWinCFI)
5394 *HasWinCFI = true;
5395 if (Imm == 0)
5396 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5397 else
5398 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5399 .addImm(Imm)
5400 .setMIFlag(Flag);
5401 assert(Offset == 0 && "Expected remaining offset to be zero to "
5402 "emit a single SEH directive");
5403 } else if (DestReg == AArch64::SP) {
5404 if (HasWinCFI)
5405 *HasWinCFI = true;
5406 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5407 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5408 .addImm(Imm)
5409 .setMIFlag(Flag);
5410 }
5411 }
5412
5413 SrcReg = TmpReg;
5414 } while (Offset);
5415}
5416
5419 unsigned DestReg, unsigned SrcReg,
5421 MachineInstr::MIFlag Flag, bool SetNZCV,
5422 bool NeedsWinCFI, bool *HasWinCFI,
5423 bool EmitCFAOffset, StackOffset CFAOffset,
5424 unsigned FrameReg) {
5425 // If a function is marked as arm_locally_streaming, then the runtime value of
5426 // vscale in the prologue/epilogue is different the runtime value of vscale
5427 // in the function's body. To avoid having to consider multiple vscales,
5428 // we can use `addsvl` to allocate any scalable stack-slots, which under
5429 // most circumstances will be only locals, not callee-save slots.
5430 const Function &F = MBB.getParent()->getFunction();
5431 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5432
5433 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5435 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5436
5437 // First emit non-scalable frame offsets, or a simple 'mov'.
5438 if (Bytes || (!Offset && SrcReg != DestReg)) {
5439 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5440 "SP increment/decrement not 8-byte aligned");
5441 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5442 if (Bytes < 0) {
5443 Bytes = -Bytes;
5444 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5445 }
5446 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5447 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5448 FrameReg);
5449 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5450 ? StackOffset::getFixed(-Bytes)
5451 : StackOffset::getFixed(Bytes);
5452 SrcReg = DestReg;
5453 FrameReg = DestReg;
5454 }
5455
5456 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5457 "SetNZCV not supported with SVE vectors");
5458 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5459 "WinCFI not supported with SVE vectors");
5460
5461 if (NumDataVectors) {
5462 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5463 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5464 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5465 CFAOffset, FrameReg);
5466 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5467 SrcReg = DestReg;
5468 }
5469
5470 if (NumPredicateVectors) {
5471 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5472 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5473 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5474 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5475 CFAOffset, FrameReg);
5476 }
5477}
5478
5481 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5482 LiveIntervals *LIS, VirtRegMap *VRM) const {
5483 // This is a bit of a hack. Consider this instruction:
5484 //
5485 // %0 = COPY %sp; GPR64all:%0
5486 //
5487 // We explicitly chose GPR64all for the virtual register so such a copy might
5488 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5489 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5490 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5491 //
5492 // To prevent that, we are going to constrain the %0 register class here.
5493 if (MI.isFullCopy()) {
5494 Register DstReg = MI.getOperand(0).getReg();
5495 Register SrcReg = MI.getOperand(1).getReg();
5496 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5497 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5498 return nullptr;
5499 }
5500 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5501 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5502 return nullptr;
5503 }
5504 // Nothing can folded with copy from/to NZCV.
5505 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5506 return nullptr;
5507 }
5508
5509 // Handle the case where a copy is being spilled or filled but the source
5510 // and destination register class don't match. For example:
5511 //
5512 // %0 = COPY %xzr; GPR64common:%0
5513 //
5514 // In this case we can still safely fold away the COPY and generate the
5515 // following spill code:
5516 //
5517 // STRXui %xzr, %stack.0
5518 //
5519 // This also eliminates spilled cross register class COPYs (e.g. between x and
5520 // d regs) of the same size. For example:
5521 //
5522 // %0 = COPY %1; GPR64:%0, FPR64:%1
5523 //
5524 // will be filled as
5525 //
5526 // LDRDui %0, fi<#0>
5527 //
5528 // instead of
5529 //
5530 // LDRXui %Temp, fi<#0>
5531 // %0 = FMOV %Temp
5532 //
5533 if (MI.isCopy() && Ops.size() == 1 &&
5534 // Make sure we're only folding the explicit COPY defs/uses.
5535 (Ops[0] == 0 || Ops[0] == 1)) {
5536 bool IsSpill = Ops[0] == 0;
5537 bool IsFill = !IsSpill;
5539 const MachineRegisterInfo &MRI = MF.getRegInfo();
5540 MachineBasicBlock &MBB = *MI.getParent();
5541 const MachineOperand &DstMO = MI.getOperand(0);
5542 const MachineOperand &SrcMO = MI.getOperand(1);
5543 Register DstReg = DstMO.getReg();
5544 Register SrcReg = SrcMO.getReg();
5545 // This is slightly expensive to compute for physical regs since
5546 // getMinimalPhysRegClass is slow.
5547 auto getRegClass = [&](unsigned Reg) {
5548 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5549 : TRI.getMinimalPhysRegClass(Reg);
5550 };
5551
5552 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5553 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5554 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5555 "Mismatched register size in non subreg COPY");
5556 if (IsSpill)
5557 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5558 getRegClass(SrcReg), &TRI, Register());
5559 else
5560 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5561 getRegClass(DstReg), &TRI, Register());
5562 return &*--InsertPt;
5563 }
5564
5565 // Handle cases like spilling def of:
5566 //
5567 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5568 //
5569 // where the physical register source can be widened and stored to the full
5570 // virtual reg destination stack slot, in this case producing:
5571 //
5572 // STRXui %xzr, %stack.0
5573 //
5574 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5575 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
5576 assert(SrcMO.getSubReg() == 0 &&
5577 "Unexpected subreg on physical register");
5578 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5579 FrameIndex, &AArch64::GPR64RegClass, &TRI,
5580 Register());
5581 return &*--InsertPt;
5582 }
5583
5584 // Handle cases like filling use of:
5585 //
5586 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5587 //
5588 // where we can load the full virtual reg source stack slot, into the subreg
5589 // destination, in this case producing:
5590 //
5591 // LDRWui %0:sub_32<def,read-undef>, %stack.0
5592 //
5593 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
5594 const TargetRegisterClass *FillRC;
5595 switch (DstMO.getSubReg()) {
5596 default:
5597 FillRC = nullptr;
5598 break;
5599 case AArch64::sub_32:
5600 FillRC = &AArch64::GPR32RegClass;
5601 break;
5602 case AArch64::ssub:
5603 FillRC = &AArch64::FPR32RegClass;
5604 break;
5605 case AArch64::dsub:
5606 FillRC = &AArch64::FPR64RegClass;
5607 break;
5608 }
5609
5610 if (FillRC) {
5611 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5612 TRI.getRegSizeInBits(*FillRC) &&
5613 "Mismatched regclass size on folded subreg COPY");
5614 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
5615 Register());
5616 MachineInstr &LoadMI = *--InsertPt;
5617 MachineOperand &LoadDst = LoadMI.getOperand(0);
5618 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
5619 LoadDst.setSubReg(DstMO.getSubReg());
5620 LoadDst.setIsUndef();
5621 return &LoadMI;
5622 }
5623 }
5624 }
5625
5626 // Cannot fold.
5627 return nullptr;
5628}
5629
5631 StackOffset &SOffset,
5632 bool *OutUseUnscaledOp,
5633 unsigned *OutUnscaledOp,
5634 int64_t *EmittableOffset) {
5635 // Set output values in case of early exit.
5636 if (EmittableOffset)
5637 *EmittableOffset = 0;
5638 if (OutUseUnscaledOp)
5639 *OutUseUnscaledOp = false;
5640 if (OutUnscaledOp)
5641 *OutUnscaledOp = 0;
5642
5643 // Exit early for structured vector spills/fills as they can't take an
5644 // immediate offset.
5645 switch (MI.getOpcode()) {
5646 default:
5647 break;
5648 case AArch64::LD1Rv1d:
5649 case AArch64::LD1Rv2s:
5650 case AArch64::LD1Rv2d:
5651 case AArch64::LD1Rv4h:
5652 case AArch64::LD1Rv4s:
5653 case AArch64::LD1Rv8b:
5654 case AArch64::LD1Rv8h:
5655 case AArch64::LD1Rv16b:
5656 case AArch64::LD1Twov2d:
5657 case AArch64::LD1Threev2d:
5658 case AArch64::LD1Fourv2d:
5659 case AArch64::LD1Twov1d:
5660 case AArch64::LD1Threev1d:
5661 case AArch64::LD1Fourv1d:
5662 case AArch64::ST1Twov2d:
5663 case AArch64::ST1Threev2d:
5664 case AArch64::ST1Fourv2d:
5665 case AArch64::ST1Twov1d:
5666 case AArch64::ST1Threev1d:
5667 case AArch64::ST1Fourv1d:
5668 case AArch64::ST1i8:
5669 case AArch64::ST1i16:
5670 case AArch64::ST1i32:
5671 case AArch64::ST1i64:
5672 case AArch64::IRG:
5673 case AArch64::IRGstack:
5674 case AArch64::STGloop:
5675 case AArch64::STZGloop:
5677 }
5678
5679 // Get the min/max offset and the scale.
5680 TypeSize ScaleValue(0U, false), Width(0U, false);
5681 int64_t MinOff, MaxOff;
5682 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
5683 MaxOff))
5684 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5685
5686 // Construct the complete offset.
5687 bool IsMulVL = ScaleValue.isScalable();
5688 unsigned Scale = ScaleValue.getKnownMinValue();
5689 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5690
5691 const MachineOperand &ImmOpnd =
5692 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
5693 Offset += ImmOpnd.getImm() * Scale;
5694
5695 // If the offset doesn't match the scale, we rewrite the instruction to
5696 // use the unscaled instruction instead. Likewise, if we have a negative
5697 // offset and there is an unscaled op to use.
5698 std::optional<unsigned> UnscaledOp =
5700 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
5701 if (useUnscaledOp &&
5702 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
5703 MaxOff))
5704 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5705
5706 Scale = ScaleValue.getKnownMinValue();
5707 assert(IsMulVL == ScaleValue.isScalable() &&
5708 "Unscaled opcode has different value for scalable");
5709
5710 int64_t Remainder = Offset % Scale;
5711 assert(!(Remainder && useUnscaledOp) &&
5712 "Cannot have remainder when using unscaled op");
5713
5714 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5715 int64_t NewOffset = Offset / Scale;
5716 if (MinOff <= NewOffset && NewOffset <= MaxOff)
5717 Offset = Remainder;
5718 else {
5719 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
5720 Offset = Offset - NewOffset * Scale;
5721 }
5722
5723 if (EmittableOffset)
5724 *EmittableOffset = NewOffset;
5725 if (OutUseUnscaledOp)
5726 *OutUseUnscaledOp = useUnscaledOp;
5727 if (OutUnscaledOp && UnscaledOp)
5728 *OutUnscaledOp = *UnscaledOp;
5729
5730 if (IsMulVL)
5731 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
5732 else
5733 SOffset = StackOffset::get(Offset, SOffset.getScalable());
5735 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
5736}
5737
5739 unsigned FrameReg, StackOffset &Offset,
5740 const AArch64InstrInfo *TII) {
5741 unsigned Opcode = MI.getOpcode();
5742 unsigned ImmIdx = FrameRegIdx + 1;
5743
5744 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
5745 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
5746 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5747 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
5748 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5749 MI.eraseFromParent();
5750 Offset = StackOffset();
5751 return true;
5752 }
5753
5754 int64_t NewOffset;
5755 unsigned UnscaledOp;
5756 bool UseUnscaledOp;
5757 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
5758 &UnscaledOp, &NewOffset);
5761 // Replace the FrameIndex with FrameReg.
5762 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
5763 if (UseUnscaledOp)
5764 MI.setDesc(TII->get(UnscaledOp));
5765
5766 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
5767 return !Offset;
5768 }
5769
5770 return false;
5771}
5772
5775 DebugLoc DL;
5776 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
5777}
5778
5780 return MCInstBuilder(AArch64::HINT).addImm(0);
5781}
5782
5783// AArch64 supports MachineCombiner.
5784bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5785
5786// True when Opc sets flag
5787static bool isCombineInstrSettingFlag(unsigned Opc) {
5788 switch (Opc) {
5789 case AArch64::ADDSWrr:
5790 case AArch64::ADDSWri:
5791 case AArch64::ADDSXrr:
5792 case AArch64::ADDSXri:
5793 case AArch64::SUBSWrr:
5794 case AArch64::SUBSXrr:
5795 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5796 case AArch64::SUBSWri:
5797 case AArch64::SUBSXri:
5798 return true;
5799 default:
5800 break;
5801 }
5802 return false;
5803}
5804
5805// 32b Opcodes that can be combined with a MUL
5806static bool isCombineInstrCandidate32(unsigned Opc) {
5807 switch (Opc) {
5808 case AArch64::ADDWrr:
5809 case AArch64::ADDWri:
5810 case AArch64::SUBWrr:
5811 case AArch64::ADDSWrr:
5812 case AArch64::ADDSWri:
5813 case AArch64::SUBSWrr:
5814 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5815 case AArch64::SUBWri:
5816 case AArch64::SUBSWri:
5817 return true;
5818 default:
5819 break;
5820 }
5821 return false;
5822}
5823
5824// 64b Opcodes that can be combined with a MUL
5825static bool isCombineInstrCandidate64(unsigned Opc) {
5826 switch (Opc) {
5827 case AArch64::ADDXrr:
5828 case AArch64::ADDXri:
5829 case AArch64::SUBXrr:
5830 case AArch64::ADDSXrr:
5831 case AArch64::ADDSXri:
5832 case AArch64::SUBSXrr:
5833 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5834 case AArch64::SUBXri:
5835 case AArch64::SUBSXri:
5836 case AArch64::ADDv8i8:
5837 case AArch64::ADDv16i8:
5838 case AArch64::ADDv4i16:
5839 case AArch64::ADDv8i16:
5840 case AArch64::ADDv2i32:
5841 case AArch64::ADDv4i32:
5842 case AArch64::SUBv8i8:
5843 case AArch64::SUBv16i8:
5844 case AArch64::SUBv4i16:
5845 case AArch64::SUBv8i16:
5846 case AArch64::SUBv2i32:
5847 case AArch64::SUBv4i32:
5848 return true;
5849 default:
5850 break;
5851 }
5852 return false;
5853}
5854
5855// FP Opcodes that can be combined with a FMUL.
5856static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5857 switch (Inst.getOpcode()) {
5858 default:
5859 break;
5860 case AArch64::FADDHrr:
5861 case AArch64::FADDSrr:
5862 case AArch64::FADDDrr:
5863 case AArch64::FADDv4f16:
5864 case AArch64::FADDv8f16:
5865 case AArch64::FADDv2f32:
5866 case AArch64::FADDv2f64:
5867 case AArch64::FADDv4f32:
5868 case AArch64::FSUBHrr:
5869 case AArch64::FSUBSrr:
5870 case AArch64::FSUBDrr:
5871 case AArch64::FSUBv4f16:
5872 case AArch64::FSUBv8f16:
5873 case AArch64::FSUBv2f32:
5874 case AArch64::FSUBv2f64:
5875 case AArch64::FSUBv4f32:
5877 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5878 // the target options or if FADD/FSUB has the contract fast-math flag.
5879 return Options.UnsafeFPMath ||
5880 Options.AllowFPOpFusion == FPOpFusion::Fast ||
5882 return true;
5883 }
5884 return false;
5885}
5886
5887// Opcodes that can be combined with a MUL
5888static bool isCombineInstrCandidate(unsigned Opc) {
5890}
5891
5892//
5893// Utility routine that checks if \param MO is defined by an
5894// \param CombineOpc instruction in the basic block \param MBB
5896 unsigned CombineOpc, unsigned ZeroReg = 0,
5897 bool CheckZeroReg = false) {
5899 MachineInstr *MI = nullptr;
5900
5901 if (MO.isReg() && MO.getReg().isVirtual())
5902 MI = MRI.getUniqueVRegDef(MO.getReg());
5903 // And it needs to be in the trace (otherwise, it won't have a depth).
5904 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
5905 return false;
5906 // Must only used by the user we combine with.
5907 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
5908 return false;
5909
5910 if (CheckZeroReg) {
5911 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
5912 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
5913 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
5914 // The third input reg must be zero.
5915 if (MI->getOperand(3).getReg() != ZeroReg)
5916 return false;
5917 }
5918
5919 if (isCombineInstrSettingFlag(CombineOpc) &&
5920 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
5921 return false;
5922
5923 return true;
5924}
5925
5926//
5927// Is \param MO defined by an integer multiply and can be combined?
5929 unsigned MulOpc, unsigned ZeroReg) {
5930 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
5931}
5932
5933//
5934// Is \param MO defined by a floating-point multiply and can be combined?
5936 unsigned MulOpc) {
5937 return canCombine(MBB, MO, MulOpc);
5938}
5939
5940// TODO: There are many more machine instruction opcodes to match:
5941// 1. Other data types (integer, vectors)
5942// 2. Other math / logic operations (xor, or)
5943// 3. Other forms of the same operation (intrinsics and other variants)
5945 bool Invert) const {
5946 if (Invert)
5947 return false;
5948 switch (Inst.getOpcode()) {
5949 // == Floating-point types ==
5950 // -- Floating-point instructions --
5951 case AArch64::FADDHrr:
5952 case AArch64::FADDSrr:
5953 case AArch64::FADDDrr:
5954 case AArch64::FMULHrr:
5955 case AArch64::FMULSrr:
5956 case AArch64::FMULDrr:
5957 case AArch64::FMULX16:
5958 case AArch64::FMULX32:
5959 case AArch64::FMULX64:
5960 // -- Advanced SIMD instructions --
5961 case AArch64::FADDv4f16:
5962 case AArch64::FADDv8f16:
5963 case AArch64::FADDv2f32:
5964 case AArch64::FADDv4f32:
5965 case AArch64::FADDv2f64:
5966 case AArch64::FMULv4f16:
5967 case AArch64::FMULv8f16:
5968 case AArch64::FMULv2f32:
5969 case AArch64::FMULv4f32:
5970 case AArch64::FMULv2f64:
5971 case AArch64::FMULXv4f16:
5972 case AArch64::FMULXv8f16:
5973 case AArch64::FMULXv2f32:
5974 case AArch64::FMULXv4f32:
5975 case AArch64::FMULXv2f64:
5976 // -- SVE instructions --
5977 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
5978 // in the SVE instruction set (though there are predicated ones).
5979 case AArch64::FADD_ZZZ_H:
5980 case AArch64::FADD_ZZZ_S:
5981 case AArch64::FADD_ZZZ_D:
5982 case AArch64::FMUL_ZZZ_H:
5983 case AArch64::FMUL_ZZZ_S:
5984 case AArch64::FMUL_ZZZ_D:
5985 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
5988
5989 // == Integer types ==
5990 // -- Base instructions --
5991 // Opcodes MULWrr and MULXrr don't exist because
5992 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
5993 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
5994 // The machine-combiner does not support three-source-operands machine
5995 // instruction. So we cannot reassociate MULs.
5996 case AArch64::ADDWrr:
5997 case AArch64::ADDXrr:
5998 case AArch64::ANDWrr:
5999 case AArch64::ANDXrr:
6000 case AArch64::ORRWrr:
6001 case AArch64::ORRXrr:
6002 case AArch64::EORWrr:
6003 case AArch64::EORXrr:
6004 case AArch64::EONWrr:
6005 case AArch64::EONXrr:
6006 // -- Advanced SIMD instructions --
6007 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6008 // in the Advanced SIMD instruction set.
6009 case AArch64::ADDv8i8:
6010 case AArch64::ADDv16i8:
6011 case AArch64::ADDv4i16:
6012 case AArch64::ADDv8i16:
6013 case AArch64::ADDv2i32:
6014 case AArch64::ADDv4i32:
6015 case AArch64::ADDv1i64:
6016 case AArch64::ADDv2i64:
6017 case AArch64::MULv8i8:
6018 case AArch64::MULv16i8:
6019 case AArch64::MULv4i16:
6020 case AArch64::MULv8i16:
6021 case AArch64::MULv2i32:
6022 case AArch64::MULv4i32:
6023 case AArch64::ANDv8i8:
6024 case AArch64::ANDv16i8:
6025 case AArch64::ORRv8i8:
6026 case AArch64::ORRv16i8:
6027 case AArch64::EORv8i8:
6028 case AArch64::EORv16i8:
6029 // -- SVE instructions --
6030 case AArch64::ADD_ZZZ_B:
6031 case AArch64::ADD_ZZZ_H:
6032 case AArch64::ADD_ZZZ_S:
6033 case AArch64::ADD_ZZZ_D:
6034 case AArch64::MUL_ZZZ_B:
6035 case AArch64::MUL_ZZZ_H:
6036 case AArch64::MUL_ZZZ_S:
6037 case AArch64::MUL_ZZZ_D:
6038 case AArch64::AND_ZZZ:
6039 case AArch64::ORR_ZZZ:
6040 case AArch64::EOR_ZZZ:
6041 return true;
6042
6043 default:
6044 return false;
6045 }
6046}
6047
6048/// Find instructions that can be turned into madd.
6050 SmallVectorImpl<unsigned> &Patterns) {
6051 unsigned Opc = Root.getOpcode();
6052 MachineBasicBlock &MBB = *Root.getParent();
6053 bool Found = false;
6054
6055 if (!isCombineInstrCandidate(Opc))
6056 return false;
6057 if (isCombineInstrSettingFlag(Opc)) {
6058 int Cmp_NZCV =
6059 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6060 // When NZCV is live bail out.
6061 if (Cmp_NZCV == -1)
6062 return false;
6063 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6064 // When opcode can't change bail out.
6065 // CHECKME: do we miss any cases for opcode conversion?
6066 if (NewOpc == Opc)
6067 return false;
6068 Opc = NewOpc;
6069 }
6070
6071 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6072 unsigned Pattern) {
6073 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6074 Patterns.push_back(Pattern);
6075 Found = true;
6076 }
6077 };
6078
6079 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6080 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6081 Patterns.push_back(Pattern);
6082 Found = true;
6083 }
6084 };
6085
6087
6088 switch (Opc) {
6089 default:
6090 break;
6091 case AArch64::ADDWrr:
6092 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6093 "ADDWrr does not have register operands");
6094 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6095 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6096 break;
6097 case AArch64::ADDXrr:
6098 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6099 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6100 break;
6101 case AArch64::SUBWrr:
6102 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6103 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6104 break;
6105 case AArch64::SUBXrr:
6106 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6107 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6108 break;
6109 case AArch64::ADDWri:
6110 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6111 break;
6112 case AArch64::ADDXri:
6113 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6114 break;
6115 case AArch64::SUBWri:
6116 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6117 break;
6118 case AArch64::SUBXri:
6119 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6120 break;
6121 case AArch64::ADDv8i8:
6122 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6123 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6124 break;
6125 case AArch64::ADDv16i8:
6126 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6127 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6128 break;
6129 case AArch64::ADDv4i16:
6130 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6131 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6132 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6133 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6134 break;
6135 case AArch64::ADDv8i16:
6136 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6137 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6138 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6139 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6140 break;
6141 case AArch64::ADDv2i32:
6142 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6143 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6144 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6145 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6146 break;
6147 case AArch64::ADDv4i32:
6148 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6149 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6150 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6151 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6152 break;
6153 case AArch64::SUBv8i8:
6154 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6155 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6156 break;
6157 case AArch64::SUBv16i8:
6158 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6159 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6160 break;
6161 case AArch64::SUBv4i16:
6162 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6163 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6164 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6165 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6166 break;
6167 case AArch64::SUBv8i16:
6168 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6169 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6170 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6171 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6172 break;
6173 case AArch64::SUBv2i32:
6174 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6175 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6176 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6177 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6178 break;
6179 case AArch64::SUBv4i32:
6180 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6181 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6182 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6183 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6184 break;
6185 }
6186 return Found;
6187}
6188/// Floating-Point Support
6189
6190/// Find instructions that can be turned into madd.
6192 SmallVectorImpl<unsigned> &Patterns) {
6193
6194 if (!isCombineInstrCandidateFP(Root))
6195 return false;
6196
6197 MachineBasicBlock &MBB = *Root.getParent();
6198 bool Found = false;
6199
6200 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6201 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6202 Patterns.push_back(Pattern);
6203 return true;
6204 }
6205 return false;
6206 };
6207
6209
6210 switch (Root.getOpcode()) {
6211 default:
6212 assert(false && "Unsupported FP instruction in combiner\n");
6213 break;
6214 case AArch64::FADDHrr:
6215 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6216 "FADDHrr does not have register operands");
6217
6218 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6219 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6220 break;
6221 case AArch64::FADDSrr:
6222 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6223 "FADDSrr does not have register operands");
6224
6225 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6226 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6227
6228 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6229 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6230 break;
6231 case AArch64::FADDDrr:
6232 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6233 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6234
6235 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6236 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6237 break;
6238 case AArch64::FADDv4f16:
6239 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6240 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6241
6242 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6243 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6244 break;
6245 case AArch64::FADDv8f16:
6246 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6247 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6248
6249 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6250 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6251 break;
6252 case AArch64::FADDv2f32:
6253 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6254 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6255
6256 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6257 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6258 break;
6259 case AArch64::FADDv2f64:
6260 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6261 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6262
6263 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6264 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6265 break;
6266 case AArch64::FADDv4f32:
6267 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6268 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6269
6270 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6271 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6272 break;
6273 case AArch64::FSUBHrr:
6274 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6275 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6276 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6277 break;
6278 case AArch64::FSUBSrr:
6279 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6280
6281 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6282 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6283
6284 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6285 break;
6286 case AArch64::FSUBDrr:
6287 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6288
6289 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6290 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6291
6292 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6293 break;
6294 case AArch64::FSUBv4f16:
6295 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6296 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6297
6298 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6299 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6300 break;
6301 case AArch64::FSUBv8f16:
6302 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6303 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6304
6305 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6306 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6307 break;
6308 case AArch64::FSUBv2f32:
6309 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6310 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6311
6312 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6313 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6314 break;
6315 case AArch64::FSUBv2f64:
6316 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6317 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6318
6319 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6320 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6321 break;
6322 case AArch64::FSUBv4f32:
6323 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6324 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6325
6326 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6327 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6328 break;
6329 }
6330 return Found;
6331}
6332
6334 SmallVectorImpl<unsigned> &Patterns) {
6335 MachineBasicBlock &MBB = *Root.getParent();
6336 bool Found = false;
6337
6338 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6340 MachineOperand &MO = Root.getOperand(Operand);
6341 MachineInstr *MI = nullptr;
6342 if (MO.isReg() && MO.getReg().isVirtual())
6343 MI = MRI.getUniqueVRegDef(MO.getReg());
6344 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6345 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6346 MI->getOperand(1).getReg().isVirtual())
6347 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6348 if (MI && MI->getOpcode() == Opcode) {
6349 Patterns.push_back(Pattern);
6350 return true;
6351 }
6352 return false;
6353 };
6354
6356
6357 switch (Root.getOpcode()) {
6358 default:
6359 return false;
6360 case AArch64::FMULv2f32:
6361 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6362 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6363 break;
6364 case AArch64::FMULv2f64:
6365 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6366 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6367 break;
6368 case AArch64::FMULv4f16:
6369 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6370 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6371 break;
6372 case AArch64::FMULv4f32:
6373 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6374 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6375 break;
6376 case AArch64::FMULv8f16:
6377 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6378 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6379 break;
6380 }
6381
6382 return Found;
6383}
6384
6386 SmallVectorImpl<unsigned> &Patterns) {
6387 unsigned Opc = Root.getOpcode();
6388 MachineBasicBlock &MBB = *Root.getParent();
6390
6391 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6392 MachineOperand &MO = Root.getOperand(1);
6393 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6394 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6395 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6399 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6400 Patterns.push_back(Pattern);
6401 return true;
6402 }
6403 return false;
6404 };
6405
6406 switch (Opc) {
6407 default:
6408 break;
6409 case AArch64::FNEGDr:
6410 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6411 case AArch64::FNEGSr:
6412 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6413 }
6414
6415 return false;
6416}
6417
6418/// Return true when a code sequence can improve throughput. It
6419/// should be called only for instructions in loops.
6420/// \param Pattern - combiner pattern
6422 switch (Pattern) {
6423 default:
6424 break;
6530 return true;
6531 } // end switch (Pattern)
6532 return false;
6533}
6534
6535/// Find other MI combine patterns.
6537 SmallVectorImpl<unsigned> &Patterns) {
6538 // A - (B + C) ==> (A - B) - C or (A - C) - B
6539 unsigned Opc = Root.getOpcode();
6540 MachineBasicBlock &MBB = *Root.getParent();
6541
6542 switch (Opc) {
6543 case AArch64::SUBWrr:
6544 case AArch64::SUBSWrr:
6545 case AArch64::SUBXrr:
6546 case AArch64::SUBSXrr:
6547 // Found candidate root.
6548 break;
6549 default:
6550 return false;
6551 }
6552
6553 if (isCombineInstrSettingFlag(Opc) &&
6554 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
6555 -1)
6556 return false;
6557
6558 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6559 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6560 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6561 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6564 return true;
6565 }
6566
6567 return false;
6568}
6569
6572 switch (Pattern) {
6576 default:
6578 }
6579}
6580
6581/// Return true when there is potentially a faster code sequence for an
6582/// instruction chain ending in \p Root. All potential patterns are listed in
6583/// the \p Pattern vector. Pattern should be sorted in priority order since the
6584/// pattern evaluator stops checking as soon as it finds a faster sequence.
6585
6587 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
6588 bool DoRegPressureReduce) const {
6589 // Integer patterns
6590 if (getMaddPatterns(Root, Patterns))
6591 return true;
6592 // Floating point patterns
6593 if (getFMULPatterns(Root, Patterns))
6594 return true;
6595 if (getFMAPatterns(Root, Patterns))
6596 return true;
6597 if (getFNEGPatterns(Root, Patterns))
6598 return true;
6599
6600 // Other patterns
6601 if (getMiscPatterns(Root, Patterns))
6602 return true;
6603
6604 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6605 DoRegPressureReduce);
6606}
6607
6609/// genFusedMultiply - Generate fused multiply instructions.
6610/// This function supports both integer and floating point instructions.
6611/// A typical example:
6612/// F|MUL I=A,B,0
6613/// F|ADD R,I,C
6614/// ==> F|MADD R,A,B,C
6615/// \param MF Containing MachineFunction
6616/// \param MRI Register information
6617/// \param TII Target information
6618/// \param Root is the F|ADD instruction
6619/// \param [out] InsInstrs is a vector of machine instructions and will
6620/// contain the generated madd instruction
6621/// \param IdxMulOpd is index of operand in Root that is the result of
6622/// the F|MUL. In the example above IdxMulOpd is 1.
6623/// \param MaddOpc the opcode fo the f|madd instruction
6624/// \param RC Register class of operands
6625/// \param kind of fma instruction (addressing mode) to be generated
6626/// \param ReplacedAddend is the result register from the instruction
6627/// replacing the non-combined operand, if any.
6628static MachineInstr *
6630 const TargetInstrInfo *TII, MachineInstr &Root,
6631 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
6632 unsigned MaddOpc, const TargetRegisterClass *RC,
6633 FMAInstKind kind = FMAInstKind::Default,
6634 const Register *ReplacedAddend = nullptr) {
6635 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6636
6637 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
6638 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6639 Register ResultReg = Root.getOperand(0).getReg();
6640 Register SrcReg0 = MUL->getOperand(1).getReg();
6641 bool Src0IsKill = MUL->getOperand(1).isKill();
6642 Register SrcReg1 = MUL->getOperand(2).getReg();
6643 bool Src1IsKill = MUL->getOperand(2).isKill();
6644
6645 Register SrcReg2;
6646 bool Src2IsKill;
6647 if (ReplacedAddend) {
6648 // If we just generated a new addend, we must be it's only use.
6649 SrcReg2 = *ReplacedAddend;
6650 Src2IsKill = true;
6651 } else {
6652 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
6653 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
6654 }
6655
6656 if (ResultReg.isVirtual())
6657 MRI.constrainRegClass(ResultReg, RC);
6658 if (SrcReg0.isVirtual())
6659 MRI.constrainRegClass(SrcReg0, RC);
6660 if (SrcReg1.isVirtual())
6661 MRI.constrainRegClass(SrcReg1, RC);
6662 if (SrcReg2.isVirtual())
6663 MRI.constrainRegClass(SrcReg2, RC);
6664
6666 if (kind == FMAInstKind::Default)
6667 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6668 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6669 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6670 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6671 else if (kind == FMAInstKind::Indexed)
6672 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6673 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6674 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6675 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6676 .addImm(MUL->getOperand(3).getImm());
6677 else if (kind == FMAInstKind::Accumulator)
6678 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6679 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6680 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6681 .addReg(SrcReg1, getKillRegState(Src1IsKill));
6682 else
6683 assert(false && "Invalid FMA instruction kind \n");
6684 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6685 InsInstrs.push_back(MIB);
6686 return MUL;
6687}
6688
6689static MachineInstr *
6691 const TargetInstrInfo *TII, MachineInstr &Root,
6693 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6694
6695 unsigned Opc = 0;
6696 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
6697 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6698 Opc = AArch64::FNMADDSrrr;
6699 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6700 Opc = AArch64::FNMADDDrrr;
6701 else
6702 return nullptr;
6703
6704 Register ResultReg = Root.getOperand(0).getReg();
6705 Register SrcReg0 = MAD->getOperand(1).getReg();
6706 Register SrcReg1 = MAD->getOperand(2).getReg();
6707 Register SrcReg2 = MAD->getOperand(3).getReg();
6708 bool Src0IsKill = MAD->getOperand(1).isKill();
6709 bool Src1IsKill = MAD->getOperand(2).isKill();
6710 bool Src2IsKill = MAD->getOperand(3).isKill();
6711 if (ResultReg.isVirtual())
6712 MRI.constrainRegClass(ResultReg, RC);
6713 if (SrcReg0.isVirtual())
6714 MRI.constrainRegClass(SrcReg0, RC);
6715 if (SrcReg1.isVirtual())
6716 MRI.constrainRegClass(SrcReg1, RC);
6717 if (SrcReg2.isVirtual())
6718 MRI.constrainRegClass(SrcReg2, RC);
6719
6721 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
6722 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6723 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6724 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6725 InsInstrs.push_back(MIB);
6726
6727 return MAD;
6728}
6729
6730/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6731static MachineInstr *
6734 unsigned IdxDupOp, unsigned MulOpc,
6736 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
6737 "Invalid index of FMUL operand");
6738
6739 MachineFunction &MF = *Root.getMF();
6741
6742 MachineInstr *Dup =
6743 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
6744
6745 if (Dup->getOpcode() == TargetOpcode::COPY)
6746 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
6747
6748 Register DupSrcReg = Dup->getOperand(1).getReg();
6749 MRI.clearKillFlags(DupSrcReg);
6750 MRI.constrainRegClass(DupSrcReg, RC);
6751
6752 unsigned DupSrcLane = Dup->getOperand(2).getImm();
6753
6754 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
6755 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
6756
6757 Register ResultReg = Root.getOperand(0).getReg();
6758
6760 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
6761 .add(MulOp)
6762 .addReg(DupSrcReg)
6763 .addImm(DupSrcLane);
6764
6765 InsInstrs.push_back(MIB);
6766 return &Root;
6767}
6768
6769/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6770/// instructions.
6771///
6772/// \see genFusedMultiply
6776 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6777 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6778 FMAInstKind::Accumulator);
6779}
6780
6781/// genNeg - Helper to generate an intermediate negation of the second operand
6782/// of Root
6784 const TargetInstrInfo *TII, MachineInstr &Root,
6786 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6787 unsigned MnegOpc, const TargetRegisterClass *RC) {
6788 Register NewVR = MRI.createVirtualRegister(RC);
6790 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
6791 .add(Root.getOperand(2));
6792 InsInstrs.push_back(MIB);
6793
6794 assert(InstrIdxForVirtReg.empty());
6795 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6796
6797 return NewVR;
6798}
6799
6800/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6801/// instructions with an additional negation of the accumulator
6805 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6806 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6807 assert(IdxMulOpd == 1);
6808
6809 Register NewVR =
6810 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6811 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6812 FMAInstKind::Accumulator, &NewVR);
6813}
6814
6815/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6816/// instructions.
6817///
6818/// \see genFusedMultiply
6822 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6823 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6824 FMAInstKind::Indexed);
6825}
6826
6827/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6828/// instructions with an additional negation of the accumulator
6832 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6833 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6834 assert(IdxMulOpd == 1);
6835
6836 Register NewVR =
6837 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6838
6839 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6840 FMAInstKind::Indexed, &NewVR);
6841}
6842
6843/// genMaddR - Generate madd instruction and combine mul and add using
6844/// an extra virtual register
6845/// Example - an ADD intermediate needs to be stored in a register:
6846/// MUL I=A,B,0
6847/// ADD R,I,Imm
6848/// ==> ORR V, ZR, Imm
6849/// ==> MADD R,A,B,V
6850/// \param MF Containing MachineFunction
6851/// \param MRI Register information
6852/// \param TII Target information
6853/// \param Root is the ADD instruction
6854/// \param [out] InsInstrs is a vector of machine instructions and will
6855/// contain the generated madd instruction
6856/// \param IdxMulOpd is index of operand in Root that is the result of
6857/// the MUL. In the example above IdxMulOpd is 1.
6858/// \param MaddOpc the opcode fo the madd instruction
6859/// \param VR is a virtual register that holds the value of an ADD operand
6860/// (V in the example above).
6861/// \param RC Register class of operands
6863 const TargetInstrInfo *TII, MachineInstr &Root,
6865 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6866 const TargetRegisterClass *RC) {
6867 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6868
6869 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6870 Register ResultReg = Root.getOperand(0).getReg();
6871 Register SrcReg0 = MUL->getOperand(1).getReg();
6872 bool Src0IsKill = MUL->getOperand(1).isKill();
6873 Register SrcReg1 = MUL->getOperand(2).getReg();
6874 bool Src1IsKill = MUL->getOperand(2).isKill();
6875
6876 if (ResultReg.isVirtual())
6877 MRI.constrainRegClass(ResultReg, RC);
6878 if (SrcReg0.isVirtual())
6879 MRI.constrainRegClass(SrcReg0, RC);
6880 if (SrcReg1.isVirtual())
6881 MRI.constrainRegClass(SrcReg1, RC);
6883 MRI.constrainRegClass(VR, RC);
6884
6886 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6887 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6888 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6889 .addReg(VR);
6890 // Insert the MADD
6891 InsInstrs.push_back(MIB);
6892 return MUL;
6893}
6894
6895/// Do the following transformation
6896/// A - (B + C) ==> (A - B) - C
6897/// A - (B + C) ==> (A - C) - B
6898static void
6900 const TargetInstrInfo *TII, MachineInstr &Root,
6903 unsigned IdxOpd1,
6904 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6905 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
6906 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
6907 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6908
6909 Register ResultReg = Root.getOperand(0).getReg();
6910 Register RegA = Root.getOperand(1).getReg();
6911 bool RegAIsKill = Root.getOperand(1).isKill();
6912 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
6913 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
6914 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
6915 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
6916 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
6917
6918 unsigned Opcode = Root.getOpcode();
6919 if (Opcode == AArch64::SUBSWrr)
6920 Opcode = AArch64::SUBWrr;
6921 else if (Opcode == AArch64::SUBSXrr)
6922 Opcode = AArch64::SUBXrr;
6923 else
6924 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
6925 "Unexpected instruction opcode.");
6926
6927 MachineInstrBuilder MIB1 =
6928 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
6929 .addReg(RegA, getKillRegState(RegAIsKill))
6930 .addReg(RegB, getKillRegState(RegBIsKill));
6931 MachineInstrBuilder MIB2 =
6932 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
6933 .addReg(NewVR, getKillRegState(true))
6934 .addReg(RegC, getKillRegState(RegCIsKill));
6935
6936 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6937 InsInstrs.push_back(MIB1);
6938 InsInstrs.push_back(MIB2);
6939 DelInstrs.push_back(AddMI);
6940}
6941
6942/// When getMachineCombinerPatterns() finds potential patterns,
6943/// this function generates the instructions that could replace the
6944/// original code sequence
6946 MachineInstr &Root, unsigned Pattern,
6949 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6950 MachineBasicBlock &MBB = *Root.getParent();
6952 MachineFunction &MF = *MBB.getParent();
6954
6955 MachineInstr *MUL = nullptr;
6956 const TargetRegisterClass *RC;
6957 unsigned Opc;
6958 switch (Pattern) {
6959 default:
6960 // Reassociate instructions.
6962 DelInstrs, InstrIdxForVirtReg);
6963 return;
6965 // A - (B + C)
6966 // ==> (A - B) - C
6967 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
6968 InstrIdxForVirtReg);
6969 break;
6971 // A - (B + C)
6972 // ==> (A - C) - B
6973 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
6974 InstrIdxForVirtReg);
6975 break;
6978 // MUL I=A,B,0
6979 // ADD R,I,C
6980 // ==> MADD R,A,B,C
6981 // --- Create(MADD);
6983 Opc = AArch64::MADDWrrr;
6984 RC = &AArch64::GPR32RegClass;
6985 } else {
6986 Opc = AArch64::MADDXrrr;
6987 RC = &AArch64::GPR64RegClass;
6988 }
6989 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
6990 break;
6993 // MUL I=A,B,0
6994 // ADD R,C,I
6995 // ==> MADD R,A,B,C
6996 // --- Create(MADD);
6998 Opc = AArch64::MADDWrrr;
6999 RC = &AArch64::GPR32RegClass;
7000 } else {
7001 Opc = AArch64::MADDXrrr;
7002 RC = &AArch64::GPR64RegClass;
7003 }
7004 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7005 break;
7008 // MUL I=A,B,0
7009 // ADD R,I,Imm
7010 // ==> MOV V, Imm
7011 // ==> MADD R,A,B,V
7012 // --- Create(MADD);
7013 const TargetRegisterClass *OrrRC;
7014 unsigned BitSize, OrrOpc, ZeroReg;
7016 OrrOpc = AArch64::ORRWri;
7017 OrrRC = &AArch64::GPR32spRegClass;
7018 BitSize = 32;
7019 ZeroReg = AArch64::WZR;
7020 Opc = AArch64::MADDWrrr;
7021 RC = &AArch64::GPR32RegClass;
7022 } else {
7023 OrrOpc = AArch64::ORRXri;
7024 OrrRC = &AArch64::GPR64spRegClass;
7025 BitSize = 64;
7026 ZeroReg = AArch64::XZR;
7027 Opc = AArch64::MADDXrrr;
7028 RC = &AArch64::GPR64RegClass;
7029 }
7030 Register NewVR = MRI.createVirtualRegister(OrrRC);
7031 uint64_t Imm = Root.getOperand(2).getImm();
7032
7033 if (Root.getOperand(3).isImm()) {
7034 unsigned Val = Root.getOperand(3).getImm();
7035 Imm = Imm << Val;
7036 }
7037 uint64_t UImm = SignExtend64(Imm, BitSize);
7038 // The immediate can be composed via a single instruction.
7040 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7041 if (Insn.size() != 1)
7042 return;
7043 auto MovI = Insn.begin();
7045 // MOV is an alias for one of three instructions: movz, movn, and orr.
7046 if (MovI->Opcode == OrrOpc)
7047 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7048 .addReg(ZeroReg)
7049 .addImm(MovI->Op2);
7050 else {
7051 if (BitSize == 32)
7052 assert((MovI->Opcode == AArch64::MOVNWi ||
7053 MovI->Opcode == AArch64::MOVZWi) &&
7054 "Expected opcode");
7055 else
7056 assert((MovI->Opcode == AArch64::MOVNXi ||
7057 MovI->Opcode == AArch64::MOVZXi) &&
7058 "Expected opcode");
7059 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7060 .addImm(MovI->Op1)
7061 .addImm(MovI->Op2);
7062 }
7063 InsInstrs.push_back(MIB1);
7064 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7065 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7066 break;
7067 }
7070 // MUL I=A,B,0
7071 // SUB R,I, C
7072 // ==> SUB V, 0, C
7073 // ==> MADD R,A,B,V // = -C + A*B
7074 // --- Create(MADD);
7075 const TargetRegisterClass *SubRC;
7076 unsigned SubOpc, ZeroReg;
7078 SubOpc = AArch64::SUBWrr;
7079 SubRC = &AArch64::GPR32spRegClass;
7080 ZeroReg = AArch64::WZR;
7081 Opc = AArch64::MADDWrrr;
7082 RC = &AArch64::GPR32RegClass;
7083 } else {
7084 SubOpc = AArch64::SUBXrr;
7085 SubRC = &AArch64::GPR64spRegClass;
7086 ZeroReg = AArch64::XZR;
7087 Opc = AArch64::MADDXrrr;
7088 RC = &AArch64::GPR64RegClass;
7089 }
7090 Register NewVR = MRI.createVirtualRegister(SubRC);
7091 // SUB NewVR, 0, C
7092 MachineInstrBuilder MIB1 =
7093 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7094 .addReg(ZeroReg)
7095 .add(Root.getOperand(2));
7096 InsInstrs.push_back(MIB1);
7097 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7098 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7099 break;
7100 }
7103 // MUL I=A,B,0
7104 // SUB R,C,I
7105 // ==> MSUB R,A,B,C (computes C - A*B)
7106 // --- Create(MSUB);
7108 Opc = AArch64::MSUBWrrr;
7109 RC = &AArch64::GPR32RegClass;
7110 } else {
7111 Opc = AArch64::MSUBXrrr;
7112 RC = &AArch64::GPR64RegClass;
7113 }
7114 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7115 break;
7118 // MUL I=A,B,0
7119 // SUB R,I, Imm
7120 // ==> MOV V, -Imm
7121 // ==> MADD R,A,B,V // = -Imm + A*B
7122 // --- Create(MADD);
7123 const TargetRegisterClass *OrrRC;
7124 unsigned BitSize, OrrOpc, ZeroReg;
7126 OrrOpc = AArch64::ORRWri;
7127 OrrRC = &AArch64::GPR32spRegClass;
7128 BitSize = 32;
7129 ZeroReg = AArch64::WZR;
7130 Opc = AArch64::MADDWrrr;
7131 RC = &AArch64::GPR32RegClass;
7132 } else {
7133 OrrOpc = AArch64::ORRXri;
7134 OrrRC = &AArch64::GPR64spRegClass;
7135 BitSize = 64;
7136 ZeroReg = AArch64::XZR;
7137 Opc = AArch64::MADDXrrr;
7138 RC = &AArch64::GPR64RegClass;
7139 }
7140 Register NewVR = MRI.createVirtualRegister(OrrRC);
7141 uint64_t Imm = Root.getOperand(2).getImm();
7142 if (Root.getOperand(3).isImm()) {
7143 unsigned Val = Root.getOperand(3).getImm();
7144 Imm = Imm << Val;
7145 }
7146 uint64_t UImm = SignExtend64(-Imm, BitSize);
7147 // The immediate can be composed via a single instruction.
7149 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7150 if (Insn.size() != 1)
7151 return;
7152 auto MovI = Insn.begin();
7154 // MOV is an alias for one of three instructions: movz, movn, and orr.
7155 if (MovI->Opcode == OrrOpc)
7156 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7157 .addReg(ZeroReg)
7158 .addImm(MovI->Op2);
7159 else {
7160 if (BitSize == 32)
7161 assert((MovI->Opcode == AArch64::MOVNWi ||
7162 MovI->Opcode == AArch64::MOVZWi) &&
7163 "Expected opcode");
7164 else
7165 assert((MovI->Opcode == AArch64::MOVNXi ||
7166 MovI->Opcode == AArch64::MOVZXi) &&
7167 "Expected opcode");
7168 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7169 .addImm(MovI->Op1)
7170 .addImm(MovI->Op2);
7171 }
7172 InsInstrs.push_back(MIB1);
7173 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7174 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7175 break;
7176 }
7177
7179 Opc = AArch64::MLAv8i8;
7180 RC = &AArch64::FPR64RegClass;
7181 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7182 break;
7184 Opc = AArch64::MLAv8i8;
7185 RC = &AArch64::FPR64RegClass;
7186 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7187 break;
7189 Opc = AArch64::MLAv16i8;
7190 RC = &AArch64::FPR128RegClass;
7191 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7192 break;
7194 Opc = AArch64::MLAv16i8;
7195 RC = &AArch64::FPR128RegClass;
7196 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7197 break;
7199 Opc = AArch64::MLAv4i16;
7200 RC = &AArch64::FPR64RegClass;
7201 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7202 break;
7204 Opc = AArch64::MLAv4i16;
7205 RC = &AArch64::FPR64RegClass;
7206 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7207 break;
7209 Opc = AArch64::MLAv8i16;
7210 RC = &AArch64::FPR128RegClass;
7211 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7212 break;
7214 Opc = AArch64::MLAv8i16;
7215 RC = &AArch64::FPR128RegClass;
7216 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7217 break;
7219 Opc = AArch64::MLAv2i32;
7220 RC = &AArch64::FPR64RegClass;
7221 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7222 break;
7224 Opc = AArch64::MLAv2i32;
7225 RC = &AArch64::FPR64RegClass;
7226 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7227 break;
7229 Opc = AArch64::MLAv4i32;
7230 RC = &AArch64::FPR128RegClass;
7231 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7232 break;
7234 Opc = AArch64::MLAv4i32;
7235 RC = &AArch64::FPR128RegClass;
7236 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7237 break;
7238
7240 Opc = AArch64::MLAv8i8;
7241 RC = &AArch64::FPR64RegClass;
7242 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7243 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7244 RC);
7245 break;
7247 Opc = AArch64::MLSv8i8;
7248 RC = &AArch64::FPR64RegClass;
7249 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7250 break;
7252 Opc = AArch64::MLAv16i8;
7253 RC = &AArch64::FPR128RegClass;
7254 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7255 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7256 RC);
7257 break;
7259 Opc = AArch64::MLSv16i8;
7260 RC = &AArch64::FPR128RegClass;
7261 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7262 break;
7264 Opc = AArch64::MLAv4i16;
7265 RC = &AArch64::FPR64RegClass;
7266 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7267 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7268 RC);
7269 break;
7271 Opc = AArch64::MLSv4i16;
7272 RC = &AArch64::FPR64RegClass;
7273 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7274 break;
7276 Opc = AArch64::MLAv8i16;
7277 RC = &AArch64::FPR128RegClass;
7278 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7279 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7280 RC);
7281 break;
7283 Opc = AArch64::MLSv8i16;
7284 RC = &AArch64::FPR128RegClass;
7285 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7286 break;
7288 Opc = AArch64::MLAv2i32;
7289 RC = &AArch64::FPR64RegClass;
7290 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7291 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7292 RC);
7293 break;
7295 Opc = AArch64::MLSv2i32;
7296 RC = &AArch64::FPR64RegClass;
7297 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7298 break;
7300 Opc = AArch64::MLAv4i32;
7301 RC = &AArch64::FPR128RegClass;
7302 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7303 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7304 RC);
7305 break;
7307 Opc = AArch64::MLSv4i32;
7308 RC = &AArch64::FPR128RegClass;
7309 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7310 break;
7311
7313 Opc = AArch64::MLAv4i16_indexed;
7314 RC = &AArch64::FPR64RegClass;
7315 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7316 break;
7318 Opc = AArch64::MLAv4i16_indexed;
7319 RC = &AArch64::FPR64RegClass;
7320 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7321 break;
7323 Opc = AArch64::MLAv8i16_indexed;
7324 RC = &AArch64::FPR128RegClass;
7325 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7326 break;
7328 Opc = AArch64::MLAv8i16_indexed;
7329 RC = &AArch64::FPR128RegClass;
7330 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7331 break;
7333 Opc = AArch64::MLAv2i32_indexed;
7334 RC = &AArch64::FPR64RegClass;
7335 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7336 break;
7338 Opc = AArch64::MLAv2i32_indexed;
7339 RC = &AArch64::FPR64RegClass;
7340 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7341 break;
7343 Opc = AArch64::MLAv4i32_indexed;
7344 RC = &AArch64::FPR128RegClass;
7345 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7346 break;
7348 Opc = AArch64::MLAv4i32_indexed;
7349 RC = &AArch64::FPR128RegClass;
7350 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7351 break;
7352
7354 Opc = AArch64::MLAv4i16_indexed;
7355 RC = &AArch64::FPR64RegClass;
7356 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7357 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7358 RC);
7359 break;
7361 Opc = AArch64::MLSv4i16_indexed;
7362 RC = &AArch64::FPR64RegClass;
7363 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7364 break;
7366 Opc = AArch64::MLAv8i16_indexed;
7367 RC = &AArch64::FPR128RegClass;
7368 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7369 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7370 RC);
7371 break;
7373 Opc = AArch64::MLSv8i16_indexed;
7374 RC = &AArch64::FPR128RegClass;
7375 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7376 break;
7378 Opc = AArch64::MLAv2i32_indexed;
7379 RC = &AArch64::FPR64RegClass;
7380 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7381 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7382 RC);
7383 break;
7385 Opc = AArch64::MLSv2i32_indexed;
7386 RC = &AArch64::FPR64RegClass;
7387 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7388 break;
7390 Opc = AArch64::MLAv4i32_indexed;
7391 RC = &AArch64::FPR128RegClass;
7392 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7393 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7394 RC);
7395 break;
7397 Opc = AArch64::MLSv4i32_indexed;
7398 RC = &AArch64::FPR128RegClass;
7399 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7400 break;
7401
7402 // Floating Point Support
7404 Opc = AArch64::FMADDHrrr;
7405 RC = &AArch64::FPR16RegClass;
7406 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7407 break;
7409 Opc = AArch64::FMADDSrrr;
7410 RC = &AArch64::FPR32RegClass;
7411 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7412 break;
7414 Opc = AArch64::FMADDDrrr;
7415 RC = &AArch64::FPR64RegClass;
7416 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7417 break;
7418
7420 Opc = AArch64::FMADDHrrr;
7421 RC = &AArch64::FPR16RegClass;
7422 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7423 break;
7425 Opc = AArch64::FMADDSrrr;
7426 RC = &AArch64::FPR32RegClass;
7427 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7428 break;
7430 Opc = AArch64::FMADDDrrr;
7431 RC = &AArch64::FPR64RegClass;
7432 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7433 break;
7434
7436 Opc = AArch64::FMLAv1i32_indexed;
7437 RC = &AArch64::FPR32RegClass;
7438 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7439 FMAInstKind::Indexed);
7440 break;
7442 Opc = AArch64::FMLAv1i32_indexed;
7443 RC = &AArch64::FPR32RegClass;
7444 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7445 FMAInstKind::Indexed);
7446 break;
7447
7449 Opc = AArch64::FMLAv1i64_indexed;
7450 RC = &AArch64::FPR64RegClass;
7451 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7452 FMAInstKind::Indexed);
7453 break;
7455 Opc = AArch64::FMLAv1i64_indexed;
7456 RC = &AArch64::FPR64RegClass;
7457 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7458 FMAInstKind::Indexed);
7459 break;
7460
7462 RC = &AArch64::FPR64RegClass;
7463 Opc = AArch64::FMLAv4i16_indexed;
7464 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7465 FMAInstKind::Indexed);
7466 break;
7468 RC = &AArch64::FPR64RegClass;
7469 Opc = AArch64::FMLAv4f16;
7470 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7471 FMAInstKind::Accumulator);
7472 break;
7474 RC = &AArch64::FPR64RegClass;
7475 Opc = AArch64::FMLAv4i16_indexed;
7476 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7477 FMAInstKind::Indexed);
7478 break;
7480 RC = &AArch64::FPR64RegClass;
7481 Opc = AArch64::FMLAv4f16;
7482 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7483 FMAInstKind::Accumulator);
7484 break;
7485
7488 RC = &AArch64::FPR64RegClass;
7490 Opc = AArch64::FMLAv2i32_indexed;
7491 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7492 FMAInstKind::Indexed);
7493 } else {
7494 Opc = AArch64::FMLAv2f32;
7495 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7496 FMAInstKind::Accumulator);
7497 }
7498 break;
7501 RC = &AArch64::FPR64RegClass;
7503 Opc = AArch64::FMLAv2i32_indexed;
7504 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7505 FMAInstKind::Indexed);
7506 } else {
7507 Opc = AArch64::FMLAv2f32;
7508 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7509 FMAInstKind::Accumulator);
7510 }
7511 break;
7512
7514 RC = &AArch64::FPR128RegClass;
7515 Opc = AArch64::FMLAv8i16_indexed;
7516 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7517 FMAInstKind::Indexed);
7518 break;
7520 RC = &AArch64::FPR128RegClass;
7521 Opc = AArch64::FMLAv8f16;
7522 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7523 FMAInstKind::Accumulator);
7524 break;
7526 RC = &AArch64::FPR128RegClass;
7527 Opc = AArch64::FMLAv8i16_indexed;
7528 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7529 FMAInstKind::Indexed);
7530 break;
7532 RC = &AArch64::FPR128RegClass;
7533 Opc = AArch64::FMLAv8f16;
7534 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7535 FMAInstKind::Accumulator);
7536 break;
7537
7540 RC = &AArch64::FPR128RegClass;
7542 Opc = AArch64::FMLAv2i64_indexed;
7543 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7544 FMAInstKind::Indexed);
7545 } else {
7546 Opc = AArch64::FMLAv2f64;
7547 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7548 FMAInstKind::Accumulator);
7549 }
7550 break;
7553 RC = &AArch64::FPR128RegClass;
7555 Opc = AArch64::FMLAv2i64_indexed;
7556 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7557 FMAInstKind::Indexed);
7558 } else {
7559 Opc = AArch64::FMLAv2f64;
7560 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7561 FMAInstKind::Accumulator);
7562 }
7563 break;
7564
7567 RC = &AArch64::FPR128RegClass;
7569 Opc = AArch64::FMLAv4i32_indexed;
7570 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7571 FMAInstKind::Indexed);
7572 } else {
7573 Opc = AArch64::FMLAv4f32;
7574 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7575 FMAInstKind::Accumulator);
7576 }
7577 break;
7578
7581 RC = &AArch64::FPR128RegClass;
7583 Opc = AArch64::FMLAv4i32_indexed;
7584 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7585 FMAInstKind::Indexed);
7586 } else {
7587 Opc = AArch64::FMLAv4f32;
7588 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7589 FMAInstKind::Accumulator);
7590 }
7591 break;
7592
7594 Opc = AArch64::FNMSUBHrrr;
7595 RC = &AArch64::FPR16RegClass;
7596 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7597 break;
7599 Opc = AArch64::FNMSUBSrrr;
7600 RC = &AArch64::FPR32RegClass;
7601 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7602 break;
7604 Opc = AArch64::FNMSUBDrrr;
7605 RC = &AArch64::FPR64RegClass;
7606 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7607 break;
7608
7610 Opc = AArch64::FNMADDHrrr;
7611 RC = &AArch64::FPR16RegClass;
7612 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7613 break;
7615 Opc = AArch64::FNMADDSrrr;
7616 RC = &AArch64::FPR32RegClass;
7617 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7618 break;
7620 Opc = AArch64::FNMADDDrrr;
7621 RC = &AArch64::FPR64RegClass;
7622 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7623 break;
7624
7626 Opc = AArch64::FMSUBHrrr;
7627 RC = &AArch64::FPR16RegClass;
7628 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7629 break;
7631 Opc = AArch64::FMSUBSrrr;
7632 RC = &AArch64::FPR32RegClass;
7633 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7634 break;
7636 Opc = AArch64::FMSUBDrrr;
7637 RC = &AArch64::FPR64RegClass;
7638 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7639 break;
7640
7642 Opc = AArch64::FMLSv1i32_indexed;
7643 RC = &AArch64::FPR32RegClass;
7644 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7645 FMAInstKind::Indexed);
7646 break;
7647
7649 Opc = AArch64::FMLSv1i64_indexed;
7650 RC = &AArch64::FPR64RegClass;
7651 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7652 FMAInstKind::Indexed);
7653 break;
7654
7657 RC = &AArch64::FPR64RegClass;
7658 Register NewVR = MRI.createVirtualRegister(RC);
7659 MachineInstrBuilder MIB1 =
7660 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7661 .add(Root.getOperand(2));
7662 InsInstrs.push_back(MIB1);
7663 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7665 Opc = AArch64::FMLAv4f16;
7666 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7667 FMAInstKind::Accumulator, &NewVR);
7668 } else {
7669 Opc = AArch64::FMLAv4i16_indexed;
7670 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7671 FMAInstKind::Indexed, &NewVR);
7672 }
7673 break;
7674 }
7676 RC = &AArch64::FPR64RegClass;
7677 Opc = AArch64::FMLSv4f16;
7678 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7679 FMAInstKind::Accumulator);
7680 break;
7682 RC = &AArch64::FPR64RegClass;
7683 Opc = AArch64::FMLSv4i16_indexed;
7684 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7685 FMAInstKind::Indexed);
7686 break;
7687
7690 RC = &AArch64::FPR64RegClass;
7692 Opc = AArch64::FMLSv2i32_indexed;
7693 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7694 FMAInstKind::Indexed);
7695 } else {
7696 Opc = AArch64::FMLSv2f32;
7697 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7698 FMAInstKind::Accumulator);
7699 }
7700 break;
7701
7704 RC = &AArch64::FPR128RegClass;
7705 Register NewVR = MRI.createVirtualRegister(RC);
7706 MachineInstrBuilder MIB1 =
7707 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7708 .add(Root.getOperand(2));
7709 InsInstrs.push_back(MIB1);
7710 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7712 Opc = AArch64::FMLAv8f16;
7713 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7714 FMAInstKind::Accumulator, &NewVR);
7715 } else {
7716 Opc = AArch64::FMLAv8i16_indexed;
7717 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7718 FMAInstKind::Indexed, &NewVR);
7719 }
7720 break;
7721 }
7723 RC = &AArch64::FPR128RegClass;
7724 Opc = AArch64::FMLSv8f16;
7725 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7726 FMAInstKind::Accumulator);
7727 break;
7729 RC = &AArch64::FPR128RegClass;
7730 Opc = AArch64::FMLSv8i16_indexed;
7731 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7732 FMAInstKind::Indexed);
7733 break;
7734
7737 RC = &AArch64::FPR128RegClass;
7739 Opc = AArch64::FMLSv2i64_indexed;
7740 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7741 FMAInstKind::Indexed);
7742 } else {
7743 Opc = AArch64::FMLSv2f64;
7744 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7745 FMAInstKind::Accumulator);
7746 }
7747 break;
7748
7751 RC = &AArch64::FPR128RegClass;
7753 Opc = AArch64::FMLSv4i32_indexed;
7754 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7755 FMAInstKind::Indexed);
7756 } else {
7757 Opc = AArch64::FMLSv4f32;
7758 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7759 FMAInstKind::Accumulator);
7760 }
7761 break;
7764 RC = &AArch64::FPR64RegClass;
7765 Register NewVR = MRI.createVirtualRegister(RC);
7766 MachineInstrBuilder MIB1 =
7767 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7768 .add(Root.getOperand(2));
7769 InsInstrs.push_back(MIB1);
7770 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7772 Opc = AArch64::FMLAv2i32_indexed;
7773 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7774 FMAInstKind::Indexed, &NewVR);
7775 } else {
7776 Opc = AArch64::FMLAv2f32;
7777 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7778 FMAInstKind::Accumulator, &NewVR);
7779 }
7780 break;
7781 }
7784 RC = &AArch64::FPR128RegClass;
7785 Register NewVR = MRI.createVirtualRegister(RC);
7786 MachineInstrBuilder MIB1 =
7787 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7788 .add(Root.getOperand(2));
7789 InsInstrs.push_back(MIB1);
7790 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7792 Opc = AArch64::FMLAv4i32_indexed;
7793 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7794 FMAInstKind::Indexed, &NewVR);
7795 } else {
7796 Opc = AArch64::FMLAv4f32;
7797 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7798 FMAInstKind::Accumulator, &NewVR);
7799 }
7800 break;
7801 }
7804 RC = &AArch64::FPR128RegClass;
7805 Register NewVR = MRI.createVirtualRegister(RC);
7806 MachineInstrBuilder MIB1 =
7807 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7808 .add(Root.getOperand(2));
7809 InsInstrs.push_back(MIB1);
7810 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7812 Opc = AArch64::FMLAv2i64_indexed;
7813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7814 FMAInstKind::Indexed, &NewVR);
7815 } else {
7816 Opc = AArch64::FMLAv2f64;
7817 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7818 FMAInstKind::Accumulator, &NewVR);
7819 }
7820 break;
7821 }
7824 unsigned IdxDupOp =
7826 : 2;
7827 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7828 &AArch64::FPR128RegClass, MRI);
7829 break;
7830 }
7833 unsigned IdxDupOp =
7835 : 2;
7836 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7837 &AArch64::FPR128RegClass, MRI);
7838 break;
7839 }
7842 unsigned IdxDupOp =
7844 : 2;
7845 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7846 &AArch64::FPR128_loRegClass, MRI);
7847 break;
7848 }
7851 unsigned IdxDupOp =
7853 : 2;
7854 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7855 &AArch64::FPR128RegClass, MRI);
7856 break;
7857 }
7860 unsigned IdxDupOp =
7862 : 2;
7863 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7864 &AArch64::FPR128_loRegClass, MRI);
7865 break;
7866 }
7868 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7869 break;
7870 }
7871
7872 } // end switch (Pattern)
7873 // Record MUL and ADD/SUB for deletion
7874 if (MUL)
7875 DelInstrs.push_back(MUL);
7876 DelInstrs.push_back(&Root);
7877
7878 // Set the flags on the inserted instructions to be the merged flags of the
7879 // instructions that we have combined.
7880 uint32_t Flags = Root.getFlags();
7881 if (MUL)
7882 Flags = Root.mergeFlagsWith(*MUL);
7883 for (auto *MI : InsInstrs)
7884 MI->setFlags(Flags);
7885}
7886
7887/// Replace csincr-branch sequence by simple conditional branch
7888///
7889/// Examples:
7890/// 1. \code
7891/// csinc w9, wzr, wzr, <condition code>
7892/// tbnz w9, #0, 0x44
7893/// \endcode
7894/// to
7895/// \code
7896/// b.<inverted condition code>
7897/// \endcode
7898///
7899/// 2. \code
7900/// csinc w9, wzr, wzr, <condition code>
7901/// tbz w9, #0, 0x44
7902/// \endcode
7903/// to
7904/// \code
7905/// b.<condition code>
7906/// \endcode
7907///
7908/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7909/// compare's constant operand is power of 2.
7910///
7911/// Examples:
7912/// \code
7913/// and w8, w8, #0x400
7914/// cbnz w8, L1
7915/// \endcode
7916/// to
7917/// \code
7918/// tbnz w8, #10, L1
7919/// \endcode
7920///
7921/// \param MI Conditional Branch
7922/// \return True when the simple conditional branch is generated
7923///
7925 bool IsNegativeBranch = false;
7926 bool IsTestAndBranch = false;
7927 unsigned TargetBBInMI = 0;
7928 switch (MI.getOpcode()) {
7929 default:
7930 llvm_unreachable("Unknown branch instruction?");
7931 case AArch64::Bcc:
7932 return false;
7933 case AArch64::CBZW:
7934 case AArch64::CBZX:
7935 TargetBBInMI = 1;
7936 break;
7937 case AArch64::CBNZW:
7938 case AArch64::CBNZX:
7939 TargetBBInMI = 1;
7940 IsNegativeBranch = true;
7941 break;
7942 case AArch64::TBZW:
7943 case AArch64::TBZX:
7944 TargetBBInMI = 2;
7945 IsTestAndBranch = true;
7946 break;
7947 case AArch64::TBNZW:
7948 case AArch64::TBNZX:
7949 TargetBBInMI = 2;
7950 IsNegativeBranch = true;
7951 IsTestAndBranch = true;
7952 break;
7953 }
7954 // So we increment a zero register and test for bits other
7955 // than bit 0? Conservatively bail out in case the verifier
7956 // missed this case.
7957 if (IsTestAndBranch && MI.getOperand(1).getImm())
7958 return false;
7959
7960 // Find Definition.
7961 assert(MI.getParent() && "Incomplete machine instruciton\n");
7962 MachineBasicBlock *MBB = MI.getParent();
7963 MachineFunction *MF = MBB->getParent();
7965 Register VReg = MI.getOperand(0).getReg();
7966 if (!VReg.isVirtual())
7967 return false;
7968
7969 MachineInstr *DefMI = MRI->getVRegDef(VReg);
7970
7971 // Look through COPY instructions to find definition.
7972 while (DefMI->isCopy()) {
7973 Register CopyVReg = DefMI->getOperand(1).getReg();
7974 if (!MRI->hasOneNonDBGUse(CopyVReg))
7975 return false;
7976 if (!MRI->hasOneDef(CopyVReg))
7977 return false;
7978 DefMI = MRI->getVRegDef(CopyVReg);
7979 }
7980
7981 switch (DefMI->getOpcode()) {
7982 default:
7983 return false;
7984 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
7985 case AArch64::ANDWri:
7986 case AArch64::ANDXri: {
7987 if (IsTestAndBranch)
7988 return false;
7989 if (DefMI->getParent() != MBB)
7990 return false;
7991 if (!MRI->hasOneNonDBGUse(VReg))
7992 return false;
7993
7994 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
7996 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
7997 if (!isPowerOf2_64(Mask))
7998 return false;
7999
8001 Register NewReg = MO.getReg();
8002 if (!NewReg.isVirtual())
8003 return false;
8004
8005 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8006
8007 MachineBasicBlock &RefToMBB = *MBB;
8008 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8009 DebugLoc DL = MI.getDebugLoc();
8010 unsigned Imm = Log2_64(Mask);
8011 unsigned Opc = (Imm < 32)
8012 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8013 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8014 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8015 .addReg(NewReg)
8016 .addImm(Imm)
8017 .addMBB(TBB);
8018 // Register lives on to the CBZ now.
8019 MO.setIsKill(false);
8020
8021 // For immediate smaller than 32, we need to use the 32-bit
8022 // variant (W) in all cases. Indeed the 64-bit variant does not
8023 // allow to encode them.
8024 // Therefore, if the input register is 64-bit, we need to take the
8025 // 32-bit sub-part.
8026 if (!Is32Bit && Imm < 32)
8027 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8028 MI.eraseFromParent();
8029 return true;
8030 }
8031 // Look for CSINC
8032 case AArch64::CSINCWr:
8033 case AArch64::CSINCXr: {
8034 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8035 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8036 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8037 DefMI->getOperand(2).getReg() == AArch64::XZR))
8038 return false;
8039
8040 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8041 true) != -1)
8042 return false;
8043
8045 // Convert only when the condition code is not modified between
8046 // the CSINC and the branch. The CC may be used by other
8047 // instructions in between.
8049 return false;
8050 MachineBasicBlock &RefToMBB = *MBB;
8051 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8052 DebugLoc DL = MI.getDebugLoc();
8053 if (IsNegativeBranch)
8055 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8056 MI.eraseFromParent();
8057 return true;
8058 }
8059 }
8060}
8061
8062std::pair<unsigned, unsigned>
8064 const unsigned Mask = AArch64II::MO_FRAGMENT;
8065 return std::make_pair(TF & Mask, TF & ~Mask);
8066}
8067
8070 using namespace AArch64II;
8071
8072 static const std::pair<unsigned, const char *> TargetFlags[] = {
8073 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8074 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8075 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8076 {MO_HI12, "aarch64-hi12"}};
8077 return ArrayRef(TargetFlags);
8078}
8079
8082 using namespace AArch64II;
8083
8084 static const std::pair<unsigned, const char *> TargetFlags[] = {
8085 {MO_COFFSTUB, "aarch64-coffstub"},
8086 {MO_GOT, "aarch64-got"},
8087 {MO_NC, "aarch64-nc"},
8088 {MO_S, "aarch64-s"},
8089 {MO_TLS, "aarch64-tls"},
8090 {MO_DLLIMPORT, "aarch64-dllimport"},
8091 {MO_PREL, "aarch64-prel"},
8092 {MO_TAGGED, "aarch64-tagged"},
8093 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8094 };
8095 return ArrayRef(TargetFlags);
8096}
8097
8100 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8101 {{MOSuppressPair, "aarch64-suppress-pair"},
8102 {MOStridedAccess, "aarch64-strided-access"}};
8103 return ArrayRef(TargetFlags);
8104}
8105
8106/// Constants defining how certain sequences should be outlined.
8107/// This encompasses how an outlined function should be called, and what kind of
8108/// frame should be emitted for that outlined function.
8109///
8110/// \p MachineOutlinerDefault implies that the function should be called with
8111/// a save and restore of LR to the stack.
8112///
8113/// That is,
8114///
8115/// I1 Save LR OUTLINED_FUNCTION:
8116/// I2 --> BL OUTLINED_FUNCTION I1
8117/// I3 Restore LR I2
8118/// I3
8119/// RET
8120///
8121/// * Call construction overhead: 3 (save + BL + restore)
8122/// * Frame construction overhead: 1 (ret)
8123/// * Requires stack fixups? Yes
8124///
8125/// \p MachineOutlinerTailCall implies that the function is being created from
8126/// a sequence of instructions ending in a return.
8127///
8128/// That is,
8129///
8130/// I1 OUTLINED_FUNCTION:
8131/// I2 --> B OUTLINED_FUNCTION I1
8132/// RET I2
8133/// RET
8134///
8135/// * Call construction overhead: 1 (B)
8136/// * Frame construction overhead: 0 (Return included in sequence)
8137/// * Requires stack fixups? No
8138///
8139/// \p MachineOutlinerNoLRSave implies that the function should be called using
8140/// a BL instruction, but doesn't require LR to be saved and restored. This
8141/// happens when LR is known to be dead.
8142///
8143/// That is,
8144///
8145/// I1 OUTLINED_FUNCTION:
8146/// I2 --> BL OUTLINED_FUNCTION I1
8147/// I3 I2
8148/// I3
8149/// RET
8150///
8151/// * Call construction overhead: 1 (BL)
8152/// * Frame construction overhead: 1 (RET)
8153/// * Requires stack fixups? No
8154///
8155/// \p MachineOutlinerThunk implies that the function is being created from
8156/// a sequence of instructions ending in a call. The outlined function is
8157/// called with a BL instruction, and the outlined function tail-calls the
8158/// original call destination.
8159///
8160/// That is,
8161///
8162/// I1 OUTLINED_FUNCTION:
8163/// I2 --> BL OUTLINED_FUNCTION I1
8164/// BL f I2
8165/// B f
8166/// * Call construction overhead: 1 (BL)
8167/// * Frame construction overhead: 0
8168/// * Requires stack fixups? No
8169///
8170/// \p MachineOutlinerRegSave implies that the function should be called with a
8171/// save and restore of LR to an available register. This allows us to avoid
8172/// stack fixups. Note that this outlining variant is compatible with the
8173/// NoLRSave case.
8174///
8175/// That is,
8176///
8177/// I1 Save LR OUTLINED_FUNCTION:
8178/// I2 --> BL OUTLINED_FUNCTION I1
8179/// I3 Restore LR I2
8180/// I3
8181/// RET
8182///
8183/// * Call construction overhead: 3 (save + BL + restore)
8184/// * Frame construction overhead: 1 (ret)
8185/// * Requires stack fixups? No
8187 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8188 MachineOutlinerTailCall, /// Only emit a branch.
8189 MachineOutlinerNoLRSave, /// Emit a call and return.
8190 MachineOutlinerThunk, /// Emit a call and tail-call.
8191 MachineOutlinerRegSave /// Same as default, but save to a register.
8193
8197 UnsafeRegsDead = 0x8
8199
8201AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8202 MachineFunction *MF = C.getMF();
8204 const AArch64RegisterInfo *ARI =
8205 static_cast<const AArch64RegisterInfo *>(&TRI);
8206 // Check if there is an available register across the sequence that we can
8207 // use.
8208 for (unsigned Reg : AArch64::GPR64RegClass) {
8209 if (!ARI->isReservedReg(*MF, Reg) &&
8210 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8211 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8212 Reg != AArch64::X17 && // Ditto for X17.
8213 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8214 C.isAvailableInsideSeq(Reg, TRI))
8215 return Reg;
8216 }
8217 return Register();
8218}
8219
8220static bool
8222 const outliner::Candidate &b) {
8223 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8224 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8225
8226 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8227 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8228}
8229
8230static bool
8232 const outliner::Candidate &b) {
8233 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8234 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8235
8236 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8237}
8238
8240 const outliner::Candidate &b) {
8241 const AArch64Subtarget &SubtargetA =
8243 const AArch64Subtarget &SubtargetB =
8244 b.getMF()->getSubtarget<AArch64Subtarget>();
8245 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8246}
8247
8248std::optional<outliner::OutlinedFunction>
8250 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8251 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
8252
8253 unsigned SequenceSize = 0;
8254 for (auto &MI : FirstCand)
8255 SequenceSize += getInstSizeInBytes(MI);
8256
8257 unsigned NumBytesToCreateFrame = 0;
8258
8259 // We only allow outlining for functions having exactly matching return
8260 // address signing attributes, i.e., all share the same value for the
8261 // attribute "sign-return-address" and all share the same type of key they
8262 // are signed with.
8263 // Additionally we require all functions to simultaniously either support
8264 // v8.3a features or not. Otherwise an outlined function could get signed
8265 // using dedicated v8.3 instructions and a call from a function that doesn't
8266 // support v8.3 instructions would therefore be invalid.
8267 if (std::adjacent_find(
8268 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8269 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8270 // Return true if a and b are non-equal w.r.t. return address
8271 // signing or support of v8.3a features
8272 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8273 outliningCandidatesSigningKeyConsensus(a, b) &&
8274 outliningCandidatesV8_3OpsConsensus(a, b)) {
8275 return false;
8276 }
8277 return true;
8278 }) != RepeatedSequenceLocs.end()) {
8279 return std::nullopt;
8280 }
8281
8282 // Since at this point all candidates agree on their return address signing
8283 // picking just one is fine. If the candidate functions potentially sign their
8284 // return addresses, the outlined function should do the same. Note that in
8285 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8286 // not certainly true that the outlined function will have to sign its return
8287 // address but this decision is made later, when the decision to outline
8288 // has already been made.
8289 // The same holds for the number of additional instructions we need: On
8290 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8291 // necessary. However, at this point we don't know if the outlined function
8292 // will have a RET instruction so we assume the worst.
8293 const TargetRegisterInfo &TRI = getRegisterInfo();
8294 // Performing a tail call may require extra checks when PAuth is enabled.
8295 // If PAuth is disabled, set it to zero for uniformity.
8296 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8297 if (FirstCand.getMF()
8298 ->getInfo<AArch64FunctionInfo>()
8299 ->shouldSignReturnAddress(true)) {
8300 // One PAC and one AUT instructions
8301 NumBytesToCreateFrame += 8;
8302
8303 // PAuth is enabled - set extra tail call cost, if any.
8304 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8305 NumBytesToCheckLRInTCEpilogue =
8307 // Checking the authenticated LR value may significantly impact
8308 // SequenceSize, so account for it for more precise results.
8309 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8310 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8311
8312 // We have to check if sp modifying instructions would get outlined.
8313 // If so we only allow outlining if sp is unchanged overall, so matching
8314 // sub and add instructions are okay to outline, all other sp modifications
8315 // are not
8316 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8317 int SPValue = 0;
8318 for (auto &MI : C) {
8319 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8320 switch (MI.getOpcode()) {
8321 case AArch64::ADDXri:
8322 case AArch64::ADDWri:
8323 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8324 assert(MI.getOperand(2).isImm() &&
8325 "Expected operand to be immediate");
8326 assert(MI.getOperand(1).isReg() &&
8327 "Expected operand to be a register");
8328 // Check if the add just increments sp. If so, we search for
8329 // matching sub instructions that decrement sp. If not, the
8330 // modification is illegal
8331 if (MI.getOperand(1).getReg() == AArch64::SP)
8332 SPValue += MI.getOperand(2).getImm();
8333 else
8334 return true;
8335 break;
8336 case AArch64::SUBXri:
8337 case AArch64::SUBWri:
8338 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8339 assert(MI.getOperand(2).isImm() &&
8340 "Expected operand to be immediate");
8341 assert(MI.getOperand(1).isReg() &&
8342 "Expected operand to be a register");
8343 // Check if the sub just decrements sp. If so, we search for
8344 // matching add instructions that increment sp. If not, the
8345 // modification is illegal
8346 if (MI.getOperand(1).getReg() == AArch64::SP)
8347 SPValue -= MI.getOperand(2).getImm();
8348 else
8349 return true;
8350 break;
8351 default:
8352 return true;
8353 }
8354 }
8355 }
8356 if (SPValue)
8357 return true;
8358 return false;
8359 };
8360 // Remove candidates with illegal stack modifying instructions
8361 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8362
8363 // If the sequence doesn't have enough candidates left, then we're done.
8364 if (RepeatedSequenceLocs.size() < 2)
8365 return std::nullopt;
8366 }
8367
8368 // Properties about candidate MBBs that hold for all of them.
8369 unsigned FlagsSetInAll = 0xF;
8370
8371 // Compute liveness information for each candidate, and set FlagsSetInAll.
8372 for (outliner::Candidate &C : RepeatedSequenceLocs)
8373 FlagsSetInAll &= C.Flags;
8374
8375 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8376
8377 // Helper lambda which sets call information for every candidate.
8378 auto SetCandidateCallInfo =
8379 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8380 for (outliner::Candidate &C : RepeatedSequenceLocs)
8381 C.setCallInfo(CallID, NumBytesForCall);
8382 };
8383
8384 unsigned FrameID = MachineOutlinerDefault;
8385 NumBytesToCreateFrame += 4;
8386
8387 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8388 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8389 });
8390
8391 // We check to see if CFI Instructions are present, and if they are
8392 // we find the number of CFI Instructions in the candidates.
8393 unsigned CFICount = 0;
8394 for (auto &I : RepeatedSequenceLocs[0]) {
8395 if (I.isCFIInstruction())
8396 CFICount++;
8397 }
8398
8399 // We compare the number of found CFI Instructions to the number of CFI
8400 // instructions in the parent function for each candidate. We must check this
8401 // since if we outline one of the CFI instructions in a function, we have to
8402 // outline them all for correctness. If we do not, the address offsets will be
8403 // incorrect between the two sections of the program.
8404 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8405 std::vector<MCCFIInstruction> CFIInstructions =
8406 C.getMF()->getFrameInstructions();
8407
8408 if (CFICount > 0 && CFICount != CFIInstructions.size())
8409 return std::nullopt;
8410 }
8411
8412 // Returns true if an instructions is safe to fix up, false otherwise.
8413 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8414 if (MI.isCall())
8415 return true;
8416
8417 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8418 !MI.readsRegister(AArch64::SP, &TRI))
8419 return true;
8420
8421 // Any modification of SP will break our code to save/restore LR.
8422 // FIXME: We could handle some instructions which add a constant
8423 // offset to SP, with a bit more work.
8424 if (MI.modifiesRegister(AArch64::SP, &TRI))
8425 return false;
8426
8427 // At this point, we have a stack instruction that we might need to
8428 // fix up. We'll handle it if it's a load or store.
8429 if (MI.mayLoadOrStore()) {
8430 const MachineOperand *Base; // Filled with the base operand of MI.
8431 int64_t Offset; // Filled with the offset of MI.
8432 bool OffsetIsScalable;
8433
8434 // Does it allow us to offset the base operand and is the base the
8435 // register SP?
8436 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8437 !Base->isReg() || Base->getReg() != AArch64::SP)
8438 return false;
8439
8440 // Fixe-up code below assumes bytes.
8441 if (OffsetIsScalable)
8442 return false;
8443
8444 // Find the minimum/maximum offset for this instruction and check
8445 // if fixing it up would be in range.
8446 int64_t MinOffset,
8447 MaxOffset; // Unscaled offsets for the instruction.
8448 // The scale to multiply the offsets by.
8449 TypeSize Scale(0U, false), DummyWidth(0U, false);
8450 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8451
8452 Offset += 16; // Update the offset to what it would be if we outlined.
8453 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8454 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8455 return false;
8456
8457 // It's in range, so we can outline it.
8458 return true;
8459 }
8460
8461 // FIXME: Add handling for instructions like "add x0, sp, #8".
8462
8463 // We can't fix it up, so don't outline it.
8464 return false;
8465 };
8466
8467 // True if it's possible to fix up each stack instruction in this sequence.
8468 // Important for frames/call variants that modify the stack.
8469 bool AllStackInstrsSafe = llvm::all_of(FirstCand, IsSafeToFixup);
8470
8471 // If the last instruction in any candidate is a terminator, then we should
8472 // tail call all of the candidates.
8473 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8474 FrameID = MachineOutlinerTailCall;
8475 NumBytesToCreateFrame = 0;
8476 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8477 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8478 }
8479
8480 else if (LastInstrOpcode == AArch64::BL ||
8481 ((LastInstrOpcode == AArch64::BLR ||
8482 LastInstrOpcode == AArch64::BLRNoIP) &&
8483 !HasBTI)) {
8484 // FIXME: Do we need to check if the code after this uses the value of LR?
8485 FrameID = MachineOutlinerThunk;
8486 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8487 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8488 }
8489
8490 else {
8491 // We need to decide how to emit calls + frames. We can always emit the same
8492 // frame if we don't need to save to the stack. If we have to save to the
8493 // stack, then we need a different frame.
8494 unsigned NumBytesNoStackCalls = 0;
8495 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8496
8497 // Check if we have to save LR.
8498 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8499 bool LRAvailable =
8500 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8501 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8502 : true;
8503 // If we have a noreturn caller, then we're going to be conservative and
8504 // say that we have to save LR. If we don't have a ret at the end of the
8505 // block, then we can't reason about liveness accurately.
8506 //
8507 // FIXME: We can probably do better than always disabling this in
8508 // noreturn functions by fixing up the liveness info.
8509 bool IsNoReturn =
8510 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8511
8512 // Is LR available? If so, we don't need a save.
8513 if (LRAvailable && !IsNoReturn) {
8514 NumBytesNoStackCalls += 4;
8515 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8516 CandidatesWithoutStackFixups.push_back(C);
8517 }
8518
8519 // Is an unused register available? If so, we won't modify the stack, so
8520 // we can outline with the same frame type as those that don't save LR.
8521 else if (findRegisterToSaveLRTo(C)) {
8522 NumBytesNoStackCalls += 12;
8523 C.setCallInfo(MachineOutlinerRegSave, 12);
8524 CandidatesWithoutStackFixups.push_back(C);
8525 }
8526
8527 // Is SP used in the sequence at all? If not, we don't have to modify
8528 // the stack, so we are guaranteed to get the same frame.
8529 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8530 NumBytesNoStackCalls += 12;
8531 C.setCallInfo(MachineOutlinerDefault, 12);
8532 CandidatesWithoutStackFixups.push_back(C);
8533 }
8534
8535 // If we outline this, we need to modify the stack. Pretend we don't
8536 // outline this by saving all of its bytes.
8537 else {
8538 NumBytesNoStackCalls += SequenceSize;
8539 }
8540 }
8541
8542 // If there are no places where we have to save LR, then note that we
8543 // don't have to update the stack. Otherwise, give every candidate the
8544 // default call type, as long as it's safe to do so.
8545 if (!AllStackInstrsSafe ||
8546 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8547 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8548 FrameID = MachineOutlinerNoLRSave;
8549 } else {
8550 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8551
8552 // Bugzilla ID: 46767
8553 // TODO: Check if fixing up the stack more than once is safe so we can
8554 // outline these.
8555 //
8556 // An outline resulting in a caller that requires stack fixups at the
8557 // callsite to a callee that also requires stack fixups can happen when
8558 // there are no available registers at the candidate callsite for a
8559 // candidate that itself also has calls.
8560 //
8561 // In other words if function_containing_sequence in the following pseudo
8562 // assembly requires that we save LR at the point of the call, but there
8563 // are no available registers: in this case we save using SP and as a
8564 // result the SP offsets requires stack fixups by multiples of 16.
8565 //
8566 // function_containing_sequence:
8567 // ...
8568 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8569 // call OUTLINED_FUNCTION_N
8570 // restore LR from SP
8571 // ...
8572 //
8573 // OUTLINED_FUNCTION_N:
8574 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8575 // ...
8576 // bl foo
8577 // restore LR from SP
8578 // ret
8579 //
8580 // Because the code to handle more than one stack fixup does not
8581 // currently have the proper checks for legality, these cases will assert
8582 // in the AArch64 MachineOutliner. This is because the code to do this
8583 // needs more hardening, testing, better checks that generated code is
8584 // legal, etc and because it is only verified to handle a single pass of
8585 // stack fixup.
8586 //
8587 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8588 // these cases until they are known to be handled. Bugzilla 46767 is
8589 // referenced in comments at the assert site.
8590 //
8591 // To avoid asserting (or generating non-legal code on noassert builds)
8592 // we remove all candidates which would need more than one stack fixup by
8593 // pruning the cases where the candidate has calls while also having no
8594 // available LR and having no available general purpose registers to copy
8595 // LR to (ie one extra stack save/restore).
8596 //
8597 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8598 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
8599 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8600 return (llvm::any_of(C, IsCall)) &&
8601 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
8602 !findRegisterToSaveLRTo(C));
8603 });
8604 }
8605 }
8606
8607 // If we dropped all of the candidates, bail out here.
8608 if (RepeatedSequenceLocs.size() < 2) {
8609 RepeatedSequenceLocs.clear();
8610 return std::nullopt;
8611 }
8612 }
8613
8614 // Does every candidate's MBB contain a call? If so, then we might have a call
8615 // in the range.
8616 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8617 // Check if the range contains a call. These require a save + restore of the
8618 // link register.
8619 bool ModStackToSaveLR = false;
8620 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
8621 [](const MachineInstr &MI) { return MI.isCall(); }))
8622 ModStackToSaveLR = true;
8623
8624 // Handle the last instruction separately. If this is a tail call, then the
8625 // last instruction is a call. We don't want to save + restore in this case.
8626 // However, it could be possible that the last instruction is a call without
8627 // it being valid to tail call this sequence. We should consider this as
8628 // well.
8629 else if (FrameID != MachineOutlinerThunk &&
8630 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8631 ModStackToSaveLR = true;
8632
8633 if (ModStackToSaveLR) {
8634 // We can't fix up the stack. Bail out.
8635 if (!AllStackInstrsSafe) {
8636 RepeatedSequenceLocs.clear();
8637 return std::nullopt;
8638 }
8639
8640 // Save + restore LR.
8641 NumBytesToCreateFrame += 8;
8642 }
8643 }
8644
8645 // If we have CFI instructions, we can only outline if the outlined section
8646 // can be a tail call
8647 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
8648 return std::nullopt;
8649
8650 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
8651 NumBytesToCreateFrame, FrameID);
8652}
8653
8655 Function &F, std::vector<outliner::Candidate> &Candidates) const {
8656 // If a bunch of candidates reach this point they must agree on their return
8657 // address signing. It is therefore enough to just consider the signing
8658 // behaviour of one of them
8659 const auto &CFn = Candidates.front().getMF()->getFunction();
8660
8661 // Since all candidates belong to the same module, just copy the
8662 // function-level attributes of an arbitrary function.
8663 if (CFn.hasFnAttribute("sign-return-address"))
8664 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
8665 if (CFn.hasFnAttribute("sign-return-address-key"))
8666 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
8667
8668 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8669}
8670
8672 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8673 const Function &F = MF.getFunction();
8674
8675 // Can F be deduplicated by the linker? If it can, don't outline from it.
8676 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8677 return false;
8678
8679 // Don't outline from functions with section markings; the program could
8680 // expect that all the code is in the named section.
8681 // FIXME: Allow outlining from multiple functions with the same section
8682 // marking.
8683 if (F.hasSection())
8684 return false;
8685
8686 // Outlining from functions with redzones is unsafe since the outliner may
8687 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8688 // outline from it.
8690 if (!AFI || AFI->hasRedZone().value_or(true))
8691 return false;
8692
8693 // FIXME: Teach the outliner to generate/handle Windows unwind info.
8695 return false;
8696
8697 // It's safe to outline from MF.
8698 return true;
8699}
8700
8703 unsigned &Flags) const {
8705 "Must track liveness!");
8707 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8708 Ranges;
8709 // According to the AArch64 Procedure Call Standard, the following are
8710 // undefined on entry/exit from a function call:
8711 //
8712 // * Registers x16, x17, (and thus w16, w17)
8713 // * Condition codes (and thus the NZCV register)
8714 //
8715 // If any of these registers are used inside or live across an outlined
8716 // function, then they may be modified later, either by the compiler or
8717 // some other tool (like the linker).
8718 //
8719 // To avoid outlining in these situations, partition each block into ranges
8720 // where these registers are dead. We will only outline from those ranges.
8722 auto AreAllUnsafeRegsDead = [&LRU]() {
8723 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8724 LRU.available(AArch64::NZCV);
8725 };
8726
8727 // We need to know if LR is live across an outlining boundary later on in
8728 // order to decide how we'll create the outlined call, frame, etc.
8729 //
8730 // It's pretty expensive to check this for *every candidate* within a block.
8731 // That's some potentially n^2 behaviour, since in the worst case, we'd need
8732 // to compute liveness from the end of the block for O(n) candidates within
8733 // the block.
8734 //
8735 // So, to improve the average case, let's keep track of liveness from the end
8736 // of the block to the beginning of *every outlinable range*. If we know that
8737 // LR is available in every range we could outline from, then we know that
8738 // we don't need to check liveness for any candidate within that range.
8739 bool LRAvailableEverywhere = true;
8740 // Compute liveness bottom-up.
8741 LRU.addLiveOuts(MBB);
8742 // Update flags that require info about the entire MBB.
8743 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8744 if (MI.isCall() && !MI.isTerminator())
8745 Flags |= MachineOutlinerMBBFlags::HasCalls;
8746 };
8747 // Range: [RangeBegin, RangeEnd)
8748 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8749 unsigned RangeLen;
8750 auto CreateNewRangeStartingAt =
8751 [&RangeBegin, &RangeEnd,
8752 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8753 RangeBegin = NewBegin;
8754 RangeEnd = std::next(RangeBegin);
8755 RangeLen = 0;
8756 };
8757 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8758 // At least one unsafe register is not dead. We do not want to outline at
8759 // this point. If it is long enough to outline from, save the range
8760 // [RangeBegin, RangeEnd).
8761 if (RangeLen > 1)
8762 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
8763 };
8764 // Find the first point where all unsafe registers are dead.
8765 // FIND: <safe instr> <-- end of first potential range
8766 // SKIP: <unsafe def>
8767 // SKIP: ... everything between ...
8768 // SKIP: <unsafe use>
8769 auto FirstPossibleEndPt = MBB.instr_rbegin();
8770 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8771 LRU.stepBackward(*FirstPossibleEndPt);
8772 // Update flags that impact how we outline across the entire block,
8773 // regardless of safety.
8774 UpdateWholeMBBFlags(*FirstPossibleEndPt);
8775 if (AreAllUnsafeRegsDead())
8776 break;
8777 }
8778 // If we exhausted the entire block, we have no safe ranges to outline.
8779 if (FirstPossibleEndPt == MBB.instr_rend())
8780 return Ranges;
8781 // Current range.
8782 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
8783 // StartPt points to the first place where all unsafe registers
8784 // are dead (if there is any such point). Begin partitioning the MBB into
8785 // ranges.
8786 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
8787 LRU.stepBackward(MI);
8788 UpdateWholeMBBFlags(MI);
8789 if (!AreAllUnsafeRegsDead()) {
8790 SaveRangeIfNonEmpty();
8791 CreateNewRangeStartingAt(MI.getIterator());
8792 continue;
8793 }
8794 LRAvailableEverywhere &= LRU.available(AArch64::LR);
8795 RangeBegin = MI.getIterator();
8796 ++RangeLen;
8797 }
8798 // Above loop misses the last (or only) range. If we are still safe, then
8799 // let's save the range.
8800 if (AreAllUnsafeRegsDead())
8801 SaveRangeIfNonEmpty();
8802 if (Ranges.empty())
8803 return Ranges;
8804 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
8805 // the order.
8806 std::reverse(Ranges.begin(), Ranges.end());
8807 // If there is at least one outlinable range where LR is unavailable
8808 // somewhere, remember that.
8809 if (!LRAvailableEverywhere)
8810 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8811 return Ranges;
8812}
8813
8816 unsigned Flags) const {
8817 MachineInstr &MI = *MIT;
8818 MachineBasicBlock *MBB = MI.getParent();
8819 MachineFunction *MF = MBB->getParent();
8821
8822 // Don't outline anything used for return address signing. The outlined
8823 // function will get signed later if needed
8824 switch (MI.getOpcode()) {
8825 case AArch64::PACM:
8826 case AArch64::PACIASP:
8827 case AArch64::PACIBSP:
8828 case AArch64::PACIASPPC:
8829 case AArch64::PACIBSPPC:
8830 case AArch64::AUTIASP:
8831 case AArch64::AUTIBSP:
8832 case AArch64::AUTIASPPCi:
8833 case AArch64::AUTIASPPCr:
8834 case AArch64::AUTIBSPPCi:
8835 case AArch64::AUTIBSPPCr:
8836 case AArch64::RETAA:
8837 case AArch64::RETAB:
8838 case AArch64::RETAASPPCi:
8839 case AArch64::RETAASPPCr:
8840 case AArch64::RETABSPPCi:
8841 case AArch64::RETABSPPCr:
8842 case AArch64::EMITBKEY:
8843 case AArch64::PAUTH_PROLOGUE:
8844 case AArch64::PAUTH_EPILOGUE:
8846 }
8847
8848 // Don't outline LOHs.
8849 if (FuncInfo->getLOHRelated().count(&MI))
8851
8852 // We can only outline these if we will tail call the outlined function, or
8853 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
8854 // in a tail call.
8855 //
8856 // FIXME: If the proper fixups for the offset are implemented, this should be
8857 // possible.
8858 if (MI.isCFIInstruction())
8860
8861 // Is this a terminator for a basic block?
8862 if (MI.isTerminator())
8863 // TargetInstrInfo::getOutliningType has already filtered out anything
8864 // that would break this, so we can allow it here.
8866
8867 // Make sure none of the operands are un-outlinable.
8868 for (const MachineOperand &MOP : MI.operands()) {
8869 // A check preventing CFI indices was here before, but only CFI
8870 // instructions should have those.
8871 assert(!MOP.isCFIIndex());
8872
8873 // If it uses LR or W30 explicitly, then don't touch it.
8874 if (MOP.isReg() && !MOP.isImplicit() &&
8875 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
8877 }
8878
8879 // Special cases for instructions that can always be outlined, but will fail
8880 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8881 // be outlined because they don't require a *specific* value to be in LR.
8882 if (MI.getOpcode() == AArch64::ADRP)
8884
8885 // If MI is a call we might be able to outline it. We don't want to outline
8886 // any calls that rely on the position of items on the stack. When we outline
8887 // something containing a call, we have to emit a save and restore of LR in
8888 // the outlined function. Currently, this always happens by saving LR to the
8889 // stack. Thus, if we outline, say, half the parameters for a function call
8890 // plus the call, then we'll break the callee's expectations for the layout
8891 // of the stack.
8892 //
8893 // FIXME: Allow calls to functions which construct a stack frame, as long
8894 // as they don't access arguments on the stack.
8895 // FIXME: Figure out some way to analyze functions defined in other modules.
8896 // We should be able to compute the memory usage based on the IR calling
8897 // convention, even if we can't see the definition.
8898 if (MI.isCall()) {
8899 // Get the function associated with the call. Look at each operand and find
8900 // the one that represents the callee and get its name.
8901 const Function *Callee = nullptr;
8902 for (const MachineOperand &MOP : MI.operands()) {
8903 if (MOP.isGlobal()) {
8904 Callee = dyn_cast<Function>(MOP.getGlobal());
8905 break;
8906 }
8907 }
8908
8909 // Never outline calls to mcount. There isn't any rule that would require
8910 // this, but the Linux kernel's "ftrace" feature depends on it.
8911 if (Callee && Callee->getName() == "\01_mcount")
8913
8914 // If we don't know anything about the callee, assume it depends on the
8915 // stack layout of the caller. In that case, it's only legal to outline
8916 // as a tail-call. Explicitly list the call instructions we know about so we
8917 // don't get unexpected results with call pseudo-instructions.
8918 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8919 if (MI.getOpcode() == AArch64::BLR ||
8920 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
8921 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8922
8923 if (!Callee)
8924 return UnknownCallOutlineType;
8925
8926 // We have a function we have information about. Check it if it's something
8927 // can safely outline.
8928 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
8929
8930 // We don't know what's going on with the callee at all. Don't touch it.
8931 if (!CalleeMF)
8932 return UnknownCallOutlineType;
8933
8934 // Check if we know anything about the callee saves on the function. If we
8935 // don't, then don't touch it, since that implies that we haven't
8936 // computed anything about its stack frame yet.
8937 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8938 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
8939 MFI.getNumObjects() > 0)
8940 return UnknownCallOutlineType;
8941
8942 // At this point, we can say that CalleeMF ought to not pass anything on the
8943 // stack. Therefore, we can outline it.
8945 }
8946
8947 // Don't touch the link register or W30.
8948 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
8949 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
8951
8952 // Don't outline BTI instructions, because that will prevent the outlining
8953 // site from being indirectly callable.
8954 if (hasBTISemantics(MI))
8956
8958}
8959
8960void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
8961 for (MachineInstr &MI : MBB) {
8962 const MachineOperand *Base;
8963 TypeSize Width(0, false);
8964 int64_t Offset;
8965 bool OffsetIsScalable;
8966
8967 // Is this a load or store with an immediate offset with SP as the base?
8968 if (!MI.mayLoadOrStore() ||
8969 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
8970 &RI) ||
8971 (Base->isReg() && Base->getReg() != AArch64::SP))
8972 continue;
8973
8974 // It is, so we have to fix it up.
8975 TypeSize Scale(0U, false);
8976 int64_t Dummy1, Dummy2;
8977
8979 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
8980 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
8981 assert(Scale != 0 && "Unexpected opcode!");
8982 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
8983
8984 // We've pushed the return address to the stack, so add 16 to the offset.
8985 // This is safe, since we already checked if it would overflow when we
8986 // checked if this instruction was legal to outline.
8987 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
8988 StackOffsetOperand.setImm(NewImm);
8989 }
8990}
8991
8993 const AArch64InstrInfo *TII,
8994 bool ShouldSignReturnAddr) {
8995 if (!ShouldSignReturnAddr)
8996 return;
8997
8998 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9001 TII->get(AArch64::PAUTH_EPILOGUE))
9003}
9004
9007 const outliner::OutlinedFunction &OF) const {
9008
9010
9012 FI->setOutliningStyle("Tail Call");
9014 // For thunk outlining, rewrite the last instruction from a call to a
9015 // tail-call.
9016 MachineInstr *Call = &*--MBB.instr_end();
9017 unsigned TailOpcode;
9018 if (Call->getOpcode() == AArch64::BL) {
9019 TailOpcode = AArch64::TCRETURNdi;
9020 } else {
9021 assert(Call->getOpcode() == AArch64::BLR ||
9022 Call->getOpcode() == AArch64::BLRNoIP);
9023 TailOpcode = AArch64::TCRETURNriALL;
9024 }
9025 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9026 .add(Call->getOperand(0))
9027 .addImm(0);
9028 MBB.insert(MBB.end(), TC);
9029 Call->eraseFromParent();
9030
9031 FI->setOutliningStyle("Thunk");
9032 }
9033
9034 bool IsLeafFunction = true;
9035
9036 // Is there a call in the outlined range?
9037 auto IsNonTailCall = [](const MachineInstr &MI) {
9038 return MI.isCall() && !MI.isReturn();
9039 };
9040
9041 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9042 // Fix up the instructions in the range, since we're going to modify the
9043 // stack.
9044
9045 // Bugzilla ID: 46767
9046 // TODO: Check if fixing up twice is safe so we can outline these.
9048 "Can only fix up stack references once");
9049 fixupPostOutline(MBB);
9050
9051 IsLeafFunction = false;
9052
9053 // LR has to be a live in so that we can save it.
9054 if (!MBB.isLiveIn(AArch64::LR))
9055 MBB.addLiveIn(AArch64::LR);
9056
9059
9062 Et = std::prev(MBB.end());
9063
9064 // Insert a save before the outlined region
9065 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9066 .addReg(AArch64::SP, RegState::Define)
9067 .addReg(AArch64::LR)
9068 .addReg(AArch64::SP)
9069 .addImm(-16);
9070 It = MBB.insert(It, STRXpre);
9071
9073 const TargetSubtargetInfo &STI = MF.getSubtarget();
9074 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9075 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9076
9077 // Add a CFI saying the stack was moved 16 B down.
9078 int64_t StackPosEntry =
9080 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9081 .addCFIIndex(StackPosEntry)
9083
9084 // Add a CFI saying that the LR that we want to find is now 16 B higher
9085 // than before.
9086 int64_t LRPosEntry = MF.addFrameInst(
9087 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9088 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9089 .addCFIIndex(LRPosEntry)
9091 }
9092
9093 // Insert a restore before the terminator for the function.
9094 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9095 .addReg(AArch64::SP, RegState::Define)
9096 .addReg(AArch64::LR, RegState::Define)
9097 .addReg(AArch64::SP)
9098 .addImm(16);
9099 Et = MBB.insert(Et, LDRXpost);
9100 }
9101
9102 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9103
9104 // If this is a tail call outlined function, then there's already a return.
9107 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9108 return;
9109 }
9110
9111 // It's not a tail call, so we have to insert the return ourselves.
9112
9113 // LR has to be a live in so that we can return to it.
9114 if (!MBB.isLiveIn(AArch64::LR))
9115 MBB.addLiveIn(AArch64::LR);
9116
9117 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9118 .addReg(AArch64::LR);
9119 MBB.insert(MBB.end(), ret);
9120
9121 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9122
9123 FI->setOutliningStyle("Function");
9124
9125 // Did we have to modify the stack by saving the link register?
9127 return;
9128
9129 // We modified the stack.
9130 // Walk over the basic block and fix up all the stack accesses.
9131 fixupPostOutline(MBB);
9132}
9133
9137
9138 // Are we tail calling?
9139 if (C.CallConstructionID == MachineOutlinerTailCall) {
9140 // If yes, then we can just branch to the label.
9141 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9142 .addGlobalAddress(M.getNamedValue(MF.getName()))
9143 .addImm(0));
9144 return It;
9145 }
9146
9147 // Are we saving the link register?
9148 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9149 C.CallConstructionID == MachineOutlinerThunk) {
9150 // No, so just insert the call.
9151 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9152 .addGlobalAddress(M.getNamedValue(MF.getName())));
9153 return It;
9154 }
9155
9156 // We want to return the spot where we inserted the call.
9158
9159 // Instructions for saving and restoring LR around the call instruction we're
9160 // going to insert.
9161 MachineInstr *Save;
9162 MachineInstr *Restore;
9163 // Can we save to a register?
9164 if (C.CallConstructionID == MachineOutlinerRegSave) {
9165 // FIXME: This logic should be sunk into a target-specific interface so that
9166 // we don't have to recompute the register.
9167 Register Reg = findRegisterToSaveLRTo(C);
9168 assert(Reg && "No callee-saved register available?");
9169
9170 // LR has to be a live in so that we can save it.
9171 if (!MBB.isLiveIn(AArch64::LR))
9172 MBB.addLiveIn(AArch64::LR);
9173
9174 // Save and restore LR from Reg.
9175 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9176 .addReg(AArch64::XZR)
9177 .addReg(AArch64::LR)
9178 .addImm(0);
9179 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9180 .addReg(AArch64::XZR)
9181 .addReg(Reg)
9182 .addImm(0);
9183 } else {
9184 // We have the default case. Save and restore from SP.
9185 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9186 .addReg(AArch64::SP, RegState::Define)
9187 .addReg(AArch64::LR)
9188 .addReg(AArch64::SP)
9189 .addImm(-16);
9190 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9191 .addReg(AArch64::SP, RegState::Define)
9192 .addReg(AArch64::LR, RegState::Define)
9193 .addReg(AArch64::SP)
9194 .addImm(16);
9195 }
9196
9197 It = MBB.insert(It, Save);
9198 It++;
9199
9200 // Insert the call.
9201 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9202 .addGlobalAddress(M.getNamedValue(MF.getName())));
9203 CallPt = It;
9204 It++;
9205
9206 It = MBB.insert(It, Restore);
9207 return CallPt;
9208}
9209
9211 MachineFunction &MF) const {
9212 return MF.getFunction().hasMinSize();
9213}
9214
9217 DebugLoc &DL,
9218 bool AllowSideEffects) const {
9219 const MachineFunction &MF = *MBB.getParent();
9221 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9222
9223 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9224 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9225 } else if (STI.hasSVE()) {
9226 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9227 .addImm(0)
9228 .addImm(0);
9229 } else {
9230 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9231 .addImm(0);
9232 }
9233}
9234
9235std::optional<DestSourcePair>
9237
9238 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9239 // and zero immediate operands used as an alias for mov instruction.
9240 if (MI.getOpcode() == AArch64::ORRWrs &&
9241 MI.getOperand(1).getReg() == AArch64::WZR &&
9242 MI.getOperand(3).getImm() == 0x0 &&
9243 // Check that the w->w move is not a zero-extending w->x mov.
9244 (!MI.getOperand(0).getReg().isVirtual() ||
9245 MI.getOperand(0).getSubReg() == 0) &&
9246 (!MI.getOperand(0).getReg().isPhysical() ||
9247 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9248 AArch64::X0,
9249 /*TRI=*/nullptr) == -1))
9250 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9251
9252 if (MI.getOpcode() == AArch64::ORRXrs &&
9253 MI.getOperand(1).getReg() == AArch64::XZR &&
9254 MI.getOperand(3).getImm() == 0x0)
9255 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9256
9257 return std::nullopt;
9258}
9259
9260std::optional<DestSourcePair>
9262 if (MI.getOpcode() == AArch64::ORRWrs &&
9263 MI.getOperand(1).getReg() == AArch64::WZR &&
9264 MI.getOperand(3).getImm() == 0x0)
9265 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9266 return std::nullopt;
9267}
9268
9269std::optional<RegImmPair>
9271 int Sign = 1;
9272 int64_t Offset = 0;
9273
9274 // TODO: Handle cases where Reg is a super- or sub-register of the
9275 // destination register.
9276 const MachineOperand &Op0 = MI.getOperand(0);
9277 if (!Op0.isReg() || Reg != Op0.getReg())
9278 return std::nullopt;
9279
9280 switch (MI.getOpcode()) {
9281 default:
9282 return std::nullopt;
9283 case AArch64::SUBWri:
9284 case AArch64::SUBXri:
9285 case AArch64::SUBSWri:
9286 case AArch64::SUBSXri:
9287 Sign *= -1;
9288 [[fallthrough]];
9289 case AArch64::ADDSWri:
9290 case AArch64::ADDSXri:
9291 case AArch64::ADDWri:
9292 case AArch64::ADDXri: {
9293 // TODO: Third operand can be global address (usually some string).
9294 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9295 !MI.getOperand(2).isImm())
9296 return std::nullopt;
9297 int Shift = MI.getOperand(3).getImm();
9298 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9299 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9300 }
9301 }
9302 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9303}
9304
9305/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9306/// the destination register then, if possible, describe the value in terms of
9307/// the source register.
9308static std::optional<ParamLoadedValue>
9310 const TargetInstrInfo *TII,
9311 const TargetRegisterInfo *TRI) {
9312 auto DestSrc = TII->isCopyLikeInstr(MI);
9313 if (!DestSrc)
9314 return std::nullopt;
9315
9316 Register DestReg = DestSrc->Destination->getReg();
9317 Register SrcReg = DestSrc->Source->getReg();
9318
9319 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9320
9321 // If the described register is the destination, just return the source.
9322 if (DestReg == DescribedReg)
9323 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9324
9325 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9326 if (MI.getOpcode() == AArch64::ORRWrs &&
9327 TRI->isSuperRegister(DestReg, DescribedReg))
9328 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9329
9330 // We may need to describe the lower part of a ORRXrs move.
9331 if (MI.getOpcode() == AArch64::ORRXrs &&
9332 TRI->isSubRegister(DestReg, DescribedReg)) {
9333 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9334 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9335 }
9336
9337 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9338 "Unhandled ORR[XW]rs copy case");
9339
9340 return std::nullopt;
9341}
9342
9344 // Functions cannot be split to different sections on AArch64 if they have
9345 // a red zone. This is because relaxing a cross-section branch may require
9346 // incrementing the stack pointer to spill a register, which would overwrite
9347 // the red zone.
9348 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9349 return false;
9350
9352}
9353
9355 const MachineBasicBlock &MBB) const {
9356 // Asm Goto blocks can contain conditional branches to goto labels, which can
9357 // get moved out of range of the branch instruction.
9358 auto isAsmGoto = [](const MachineInstr &MI) {
9359 return MI.getOpcode() == AArch64::INLINEASM_BR;
9360 };
9361 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9362 return false;
9363
9364 // Because jump tables are label-relative instead of table-relative, they all
9365 // must be in the same section or relocation fixup handling will fail.
9366
9367 // Check if MBB is a jump table target
9369 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9370 return llvm::is_contained(JTE.MBBs, &MBB);
9371 };
9372 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9373 return false;
9374
9375 // Check if MBB contains a jump table lookup
9376 for (const MachineInstr &MI : MBB) {
9377 switch (MI.getOpcode()) {
9378 case TargetOpcode::G_BRJT:
9379 case AArch64::JumpTableDest32:
9380 case AArch64::JumpTableDest16:
9381 case AArch64::JumpTableDest8:
9382 return false;
9383 default:
9384 continue;
9385 }
9386 }
9387
9388 // MBB isn't a special case, so it's safe to be split to the cold section.
9389 return true;
9390}
9391
9392std::optional<ParamLoadedValue>
9394 Register Reg) const {
9395 const MachineFunction *MF = MI.getMF();
9397 switch (MI.getOpcode()) {
9398 case AArch64::MOVZWi:
9399 case AArch64::MOVZXi: {
9400 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9401 // 64-bit parameters, so we need to consider super-registers.
9402 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9403 return std::nullopt;
9404
9405 if (!MI.getOperand(1).isImm())
9406 return std::nullopt;
9407 int64_t Immediate = MI.getOperand(1).getImm();
9408 int Shift = MI.getOperand(2).getImm();
9409 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9410 nullptr);
9411 }
9412 case AArch64::ORRWrs:
9413 case AArch64::ORRXrs:
9414 return describeORRLoadedValue(MI, Reg, this, TRI);
9415 }
9416
9418}
9419
9421 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9422 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9423 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9424 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9425
9426 // Anyexts are nops.
9427 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9428 return true;
9429
9430 Register DefReg = ExtMI.getOperand(0).getReg();
9431 if (!MRI.hasOneNonDBGUse(DefReg))
9432 return false;
9433
9434 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9435 // addressing mode.
9436 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9437 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9438}
9439
9441 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9442}
9443
9444bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9445 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9446}
9447
9448bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9449 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9450}
9451
9452unsigned int
9454 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9455}
9456
9457bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9458 unsigned Scale) const {
9459 if (Offset && Scale)
9460 return false;
9461
9462 // Check Reg + Imm
9463 if (!Scale) {
9464 // 9-bit signed offset
9465 if (isInt<9>(Offset))
9466 return true;
9467
9468 // 12-bit unsigned offset
9469 unsigned Shift = Log2_64(NumBytes);
9470 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9471 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9472 (Offset >> Shift) << Shift == Offset)
9473 return true;
9474 return false;
9475 }
9476
9477 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9478 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9479}
9480
9482 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9483 return AArch64::BLRNoIP;
9484 else
9485 return AArch64::BLR;
9486}
9487
9490 Register TargetReg, bool FrameSetup) const {
9491 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9492
9494 MachineFunction &MF = *MBB.getParent();
9495 const AArch64InstrInfo *TII =
9496 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9497 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9499
9500 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9501 MachineBasicBlock *LoopTestMBB =
9503 MF.insert(MBBInsertPoint, LoopTestMBB);
9504 MachineBasicBlock *LoopBodyMBB =
9506 MF.insert(MBBInsertPoint, LoopBodyMBB);
9508 MF.insert(MBBInsertPoint, ExitMBB);
9509 MachineInstr::MIFlag Flags =
9511
9512 // LoopTest:
9513 // SUB SP, SP, #ProbeSize
9514 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9515 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9516
9517 // CMP SP, TargetReg
9518 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9519 AArch64::XZR)
9520 .addReg(AArch64::SP)
9521 .addReg(TargetReg)
9523 .setMIFlags(Flags);
9524
9525 // B.<Cond> LoopExit
9526 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9528 .addMBB(ExitMBB)
9529 .setMIFlags(Flags);
9530
9531 // STR XZR, [SP]
9532 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9533 .addReg(AArch64::XZR)
9534 .addReg(AArch64::SP)
9535 .addImm(0)
9536 .setMIFlags(Flags);
9537
9538 // B loop
9539 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9540 .addMBB(LoopTestMBB)
9541 .setMIFlags(Flags);
9542
9543 // LoopExit:
9544 // MOV SP, TargetReg
9545 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9546 .addReg(TargetReg)
9547 .addImm(0)
9549 .setMIFlags(Flags);
9550
9551 // LDR XZR, [SP]
9552 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9553 .addReg(AArch64::XZR, RegState::Define)
9554 .addReg(AArch64::SP)
9555 .addImm(0)
9556 .setMIFlags(Flags);
9557
9558 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9560
9561 LoopTestMBB->addSuccessor(ExitMBB);
9562 LoopTestMBB->addSuccessor(LoopBodyMBB);
9563 LoopBodyMBB->addSuccessor(LoopTestMBB);
9564 MBB.addSuccessor(LoopTestMBB);
9565
9566 // Update liveins.
9567 if (MF.getRegInfo().reservedRegsFrozen())
9568 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
9569
9570 return ExitMBB->begin();
9571}
9572
9573namespace {
9574class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9575 MachineInstr *PredBranch;
9577
9578public:
9579 AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
9581 : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {}
9582
9583 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
9584 // Make the instructions for loop control be placed in stage 0.
9585 // The predecessors of PredBranch are considered by the caller.
9586 return MI == PredBranch;
9587 }
9588
9589 std::optional<bool> createTripCountGreaterCondition(
9590 int TC, MachineBasicBlock &MBB,
9591 SmallVectorImpl<MachineOperand> &CondParam) override {
9592 // A branch instruction will be inserted as "if (Cond) goto epilogue".
9593 // Cond is normalized for such use.
9594 // The predecessors of the branch are assumed to have already been inserted.
9595 CondParam = Cond;
9596 return {};
9597 }
9598
9599 void setPreheader(MachineBasicBlock *NewPreheader) override {}
9600
9601 void adjustTripCount(int TripCountAdjust) override {}
9602
9603 void disposed() override {}
9604};
9605} // namespace
9606
9607static bool isCompareAndBranch(unsigned Opcode) {
9608 switch (Opcode) {
9609 case AArch64::CBZW:
9610 case AArch64::CBZX:
9611 case AArch64::CBNZW:
9612 case AArch64::CBNZX:
9613 case AArch64::TBZW:
9614 case AArch64::TBZX:
9615 case AArch64::TBNZW:
9616 case AArch64::TBNZX:
9617 return true;
9618 }
9619 return false;
9620}
9621
9622std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9624 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
9626 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
9627 return nullptr;
9628
9629 // Infinite loops are not supported
9630 if (TBB == LoopBB && FBB == LoopBB)
9631 return nullptr;
9632
9633 // Must be conditional branch
9634 if (FBB == nullptr)
9635 return nullptr;
9636
9637 assert((TBB == LoopBB || FBB == LoopBB) &&
9638 "The Loop must be a single-basic-block loop");
9639
9640 // Normalization for createTripCountGreaterCondition()
9641 if (TBB == LoopBB)
9643
9644 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
9646
9647 // Find the immediate predecessor of the conditional branch
9648 MachineInstr *PredBranch = nullptr;
9649 if (CondBranch->getOpcode() == AArch64::Bcc) {
9650 for (MachineInstr &MI : reverse(*LoopBB)) {
9651 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9652 PredBranch = &MI;
9653 break;
9654 }
9655 }
9656 if (!PredBranch)
9657 return nullptr;
9658 } else if (isCompareAndBranch(CondBranch->getOpcode())) {
9659 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9660 Register Reg = CondBranch->getOperand(0).getReg();
9661 if (!Reg.isVirtual())
9662 return nullptr;
9663 PredBranch = MRI.getVRegDef(Reg);
9664
9665 // MachinePipeliner does not expect that the immediate predecessor is a Phi
9666 if (PredBranch->isPHI())
9667 return nullptr;
9668
9669 if (PredBranch->getParent() != LoopBB)
9670 return nullptr;
9671 } else {
9672 return nullptr;
9673 }
9674
9675 return std::make_unique<AArch64PipelinerLoopInfo>(PredBranch, Cond);
9676}
9677
9678#define GET_INSTRINFO_HELPERS
9679#define GET_INSTRMAP_INFO
9680#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCompareAndBranch(unsigned Opcode)
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
static bool isFpOrNEON(const MachineInstr &MI)
Returns whether the instruction is FP or NEON.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:681
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:391
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:771
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:487
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:386
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:333
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.